media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,402 @@
1
+ """Fast frame extraction using OpenCV or FFmpeg.
2
+
3
+ OpenCV's VideoCapture is fast for normal videos but decodes at full resolution.
4
+ For high-resolution videos (4K+), FFmpeg decoding at target resolution is faster.
5
+
6
+ Also supports direct image loading - when given an image file, it loads it
7
+ directly instead of trying to use VideoCapture.
8
+ """
9
+
10
+ import logging
11
+ import os
12
+ import subprocess
13
+ import tempfile
14
+
15
+ import cv2
16
+ import numpy as np
17
+
18
+ from media_engine.schemas import MediaType, get_media_type
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Resolution threshold for using FFmpeg decode (4K+)
23
+ HIGH_RES_THRESHOLD = 3840 * 2160 # ~8.3M pixels
24
+
25
+
26
+ class FrameExtractor:
27
+ """Extract frames from video or image using OpenCV.
28
+
29
+ Uses cv2.VideoCapture for fast seeking and frame extraction from videos.
30
+ Falls back to ffmpeg for exotic codecs that OpenCV can't handle.
31
+ For images, loads directly with cv2.imread (no frame extraction needed).
32
+ """
33
+
34
+ # Default max dimension - scale down 4K to ~HD for faster processing
35
+ DEFAULT_MAX_DIMENSION = 1920
36
+
37
+ def __init__(self, file_path: str, max_dimension: int | None = DEFAULT_MAX_DIMENSION):
38
+ """Initialize frame extractor.
39
+
40
+ Args:
41
+ file_path: Path to video or image file
42
+ max_dimension: Maximum width/height. Frames larger than this are scaled down.
43
+ Set to None to disable scaling. Default: 1920 (HD)
44
+ """
45
+ self.video_path = file_path # Keep name for compatibility
46
+ self.max_dimension = max_dimension
47
+ self.cap: cv2.VideoCapture | None = None
48
+ self._duration: float | None = None
49
+ self._fps: float | None = None
50
+ self._frame_count: int | None = None
51
+ self._width: int | None = None
52
+ self._height: int | None = None
53
+ self._use_ffmpeg_fallback = False
54
+ self._use_ffmpeg_decode = False # For high-res, decode at lower res with FFmpeg
55
+ # Image handling
56
+ self._is_image = False
57
+ self._image_frame: np.ndarray | None = None
58
+
59
+ def __enter__(self) -> "FrameExtractor":
60
+ """Open video or image file."""
61
+ # Check if this is an image file
62
+ media_type = get_media_type(self.video_path)
63
+ if media_type == MediaType.IMAGE:
64
+ self._is_image = True
65
+ self._duration = 0.0
66
+ self._fps = 1.0
67
+ self._frame_count = 1
68
+ # Load the image directly
69
+ self._image_frame = cv2.imread(self.video_path)
70
+ if self._image_frame is None:
71
+ logger.warning(f"Failed to load image: {self.video_path}")
72
+ else:
73
+ # Apply scaling
74
+ self._image_frame = self._scale_frame(self._image_frame)
75
+ logger.debug(f"Loaded image directly: {self.video_path}")
76
+ return self
77
+
78
+ # Video file - use VideoCapture
79
+ self.cap = cv2.VideoCapture(self.video_path)
80
+
81
+ if not self.cap.isOpened():
82
+ logger.warning(f"OpenCV failed to open {self.video_path}, using ffmpeg fallback")
83
+ self._use_ffmpeg_fallback = True
84
+ self.cap = None
85
+ else:
86
+ self._fps = self.cap.get(cv2.CAP_PROP_FPS)
87
+ self._frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
88
+ self._width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
89
+ self._height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
90
+
91
+ if self._fps > 0 and self._frame_count > 0:
92
+ self._duration = self._frame_count / self._fps
93
+ else:
94
+ # Seek to estimate duration
95
+ self._duration = self._get_duration_ffprobe()
96
+
97
+ # Check if this is high-res video that needs FFmpeg decode
98
+ if self._width and self._height and self.max_dimension:
99
+ pixels = self._width * self._height
100
+ max_dim = max(self._width, self._height)
101
+ if pixels > HIGH_RES_THRESHOLD and max_dim > self.max_dimension:
102
+ logger.info(f"High-res video ({self._width}x{self._height}), " f"using FFmpeg decode at {self.max_dimension}px")
103
+ self._use_ffmpeg_decode = True
104
+ # Release opencv capture - we'll use FFmpeg instead
105
+ self.cap.release()
106
+ self.cap = None
107
+
108
+ return self
109
+
110
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None: # noqa: ANN001
111
+ """Release video file."""
112
+ if self.cap is not None:
113
+ self.cap.release()
114
+ self.cap = None
115
+ # Clear image reference
116
+ self._image_frame = None
117
+
118
+ @property
119
+ def is_image(self) -> bool:
120
+ """Check if this extractor is handling an image (not a video)."""
121
+ return self._is_image
122
+
123
+ @property
124
+ def duration(self) -> float:
125
+ """Get video duration in seconds (0 for images)."""
126
+ if self._duration is None:
127
+ self._duration = self._get_duration_ffprobe()
128
+ return self._duration
129
+
130
+ @property
131
+ def fps(self) -> float:
132
+ """Get video frame rate (1 for images)."""
133
+ if self._fps is None:
134
+ self._fps = 30.0 # Default fallback
135
+ return self._fps
136
+
137
+ def _get_duration_ffprobe(self) -> float:
138
+ """Get duration using ffprobe."""
139
+ try:
140
+ cmd = [
141
+ "ffprobe",
142
+ "-v",
143
+ "quiet",
144
+ "-show_entries",
145
+ "format=duration",
146
+ "-of",
147
+ "default=noprint_wrappers=1:nokey=1",
148
+ self.video_path,
149
+ ]
150
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
151
+ return float(result.stdout.strip())
152
+ except (subprocess.CalledProcessError, ValueError):
153
+ return 0.0
154
+
155
+ def _scale_frame(self, frame: np.ndarray) -> np.ndarray:
156
+ """Scale down frame if larger than max_dimension.
157
+
158
+ Maintains aspect ratio. Only scales down, never up.
159
+ """
160
+ if self.max_dimension is None:
161
+ return frame
162
+
163
+ h, w = frame.shape[:2]
164
+ max_dim = max(h, w)
165
+
166
+ if max_dim <= self.max_dimension:
167
+ return frame
168
+
169
+ # Calculate scale factor
170
+ scale = self.max_dimension / max_dim
171
+ new_w = int(w * scale)
172
+ new_h = int(h * scale)
173
+
174
+ # Use INTER_AREA for downscaling (best quality)
175
+ return cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
176
+
177
+ def get_frame_at(self, timestamp: float) -> np.ndarray | None:
178
+ """Extract a single frame at the given timestamp.
179
+
180
+ Args:
181
+ timestamp: Time in seconds (ignored for images)
182
+
183
+ Returns:
184
+ Frame as BGR numpy array (scaled to max_dimension), or None if extraction failed
185
+ """
186
+ # For images, always return the loaded image (timestamp is ignored)
187
+ if self._is_image:
188
+ return self._image_frame
189
+
190
+ # High-res video: use FFmpeg with scale filter (decodes at target res)
191
+ if self._use_ffmpeg_decode:
192
+ return self._get_frame_ffmpeg_scaled(timestamp)
193
+
194
+ if self._use_ffmpeg_fallback:
195
+ frame = self._get_frame_ffmpeg(timestamp)
196
+ return self._scale_frame(frame) if frame is not None else None
197
+
198
+ if self.cap is None:
199
+ return None
200
+
201
+ # Seek to timestamp
202
+ self.cap.set(cv2.CAP_PROP_POS_MSEC, timestamp * 1000)
203
+
204
+ ret, frame = self.cap.read()
205
+ if not ret:
206
+ # Try ffmpeg fallback for this frame
207
+ frame = self._get_frame_ffmpeg(timestamp)
208
+ return self._scale_frame(frame) if frame is not None else None
209
+
210
+ return self._scale_frame(frame)
211
+
212
+ def get_frames_at(self, timestamps: list[float]) -> list[tuple[float, np.ndarray | None]]:
213
+ """Extract multiple frames at given timestamps.
214
+
215
+ More efficient than calling get_frame_at repeatedly as it
216
+ processes timestamps in order to minimize seeking.
217
+
218
+ Args:
219
+ timestamps: List of times in seconds
220
+
221
+ Returns:
222
+ List of (timestamp, frame) tuples
223
+ """
224
+ # Sort timestamps for efficient sequential access
225
+ sorted_ts = sorted(set(timestamps))
226
+ results: dict[float, np.ndarray | None] = {}
227
+
228
+ for ts in sorted_ts:
229
+ results[ts] = self.get_frame_at(ts)
230
+
231
+ # Return in original order
232
+ return [(ts, results.get(ts)) for ts in timestamps]
233
+
234
+ def _get_frame_ffmpeg(self, timestamp: float) -> np.ndarray | None:
235
+ """Extract frame using ffmpeg (fallback, no scaling)."""
236
+ with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
237
+ tmp_path = tmp.name
238
+
239
+ try:
240
+ cmd = [
241
+ "ffmpeg",
242
+ "-y",
243
+ "-ss",
244
+ str(timestamp),
245
+ "-i",
246
+ self.video_path,
247
+ "-frames:v",
248
+ "1",
249
+ "-update",
250
+ "1", # Required for ffmpeg 8.x single-image output
251
+ "-q:v",
252
+ "2",
253
+ tmp_path,
254
+ ]
255
+ subprocess.run(cmd, capture_output=True, check=True)
256
+
257
+ if os.path.exists(tmp_path):
258
+ frame = cv2.imread(tmp_path)
259
+ return frame
260
+ except subprocess.CalledProcessError:
261
+ pass
262
+ finally:
263
+ if os.path.exists(tmp_path):
264
+ os.unlink(tmp_path)
265
+
266
+ return None
267
+
268
+ def _get_frame_ffmpeg_scaled(self, timestamp: float) -> np.ndarray | None:
269
+ """Extract frame using FFmpeg with scale filter (for high-res videos).
270
+
271
+ This is faster than decoding at full resolution and then scaling with cv2.
272
+ """
273
+ if self.max_dimension is None:
274
+ return self._get_frame_ffmpeg(timestamp)
275
+
276
+ with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
277
+ tmp_path = tmp.name
278
+
279
+ try:
280
+ # Scale filter that maintains aspect ratio
281
+ # scale=W:H:force_original_aspect_ratio=decrease
282
+ scale_filter = f"scale={self.max_dimension}:{self.max_dimension}" f":force_original_aspect_ratio=decrease"
283
+
284
+ cmd = [
285
+ "ffmpeg",
286
+ "-y",
287
+ "-ss",
288
+ str(timestamp),
289
+ "-i",
290
+ self.video_path,
291
+ "-vf",
292
+ scale_filter,
293
+ "-frames:v",
294
+ "1",
295
+ "-update",
296
+ "1",
297
+ "-q:v",
298
+ "2",
299
+ tmp_path,
300
+ ]
301
+ subprocess.run(cmd, capture_output=True, check=True)
302
+
303
+ if os.path.exists(tmp_path):
304
+ frame = cv2.imread(tmp_path)
305
+ return frame
306
+ except subprocess.CalledProcessError:
307
+ pass
308
+ finally:
309
+ if os.path.exists(tmp_path):
310
+ os.unlink(tmp_path)
311
+
312
+ return None
313
+
314
+ def save_frame(self, frame: np.ndarray, output_path: str, quality: int = 95) -> bool:
315
+ """Save frame to file.
316
+
317
+ Args:
318
+ frame: BGR numpy array
319
+ output_path: Output file path
320
+ quality: JPEG quality (0-100)
321
+
322
+ Returns:
323
+ True if saved successfully
324
+ """
325
+ try:
326
+ cv2.imwrite(output_path, frame, [cv2.IMWRITE_JPEG_QUALITY, quality])
327
+ return os.path.exists(output_path)
328
+ except Exception as e:
329
+ logger.warning(f"Failed to save frame to {output_path}: {e}")
330
+ return False
331
+
332
+
333
+ def extract_frames_batch(
334
+ video_path: str,
335
+ timestamps: list[float],
336
+ output_dir: str | None = None,
337
+ ) -> list[tuple[float, np.ndarray | None]]:
338
+ """Extract multiple frames from a video.
339
+
340
+ Convenience function that handles the context manager.
341
+
342
+ Args:
343
+ video_path: Path to video file
344
+ timestamps: List of timestamps in seconds
345
+ output_dir: Optional directory to save frames as JPEG files
346
+
347
+ Returns:
348
+ List of (timestamp, frame) tuples
349
+ """
350
+ with FrameExtractor(video_path) as extractor:
351
+ results = extractor.get_frames_at(timestamps)
352
+
353
+ if output_dir:
354
+ os.makedirs(output_dir, exist_ok=True)
355
+ for ts, frame in results:
356
+ if frame is not None:
357
+ output_path = os.path.join(output_dir, f"frame_{ts:.3f}.jpg")
358
+ extractor.save_frame(frame, output_path)
359
+
360
+ return results
361
+
362
+
363
+ def get_video_duration(file_path: str) -> float:
364
+ """Get video duration in seconds.
365
+
366
+ Args:
367
+ file_path: Path to video or image file
368
+
369
+ Returns:
370
+ Duration in seconds, or 0 for images/unknown files
371
+ """
372
+ # Check if this is an image - images have 0 duration
373
+ media_type = get_media_type(file_path)
374
+ if media_type == MediaType.IMAGE:
375
+ return 0.0
376
+
377
+ # Try OpenCV first (faster)
378
+ cap = cv2.VideoCapture(file_path)
379
+ if cap.isOpened():
380
+ fps = cap.get(cv2.CAP_PROP_FPS)
381
+ frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
382
+ cap.release()
383
+
384
+ if fps > 0 and frame_count > 0:
385
+ return frame_count / fps
386
+
387
+ # Fallback to ffprobe
388
+ try:
389
+ cmd = [
390
+ "ffprobe",
391
+ "-v",
392
+ "quiet",
393
+ "-show_entries",
394
+ "format=duration",
395
+ "-of",
396
+ "default=noprint_wrappers=1:nokey=1",
397
+ file_path,
398
+ ]
399
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
400
+ return float(result.stdout.strip())
401
+ except (subprocess.CalledProcessError, ValueError):
402
+ return 0.0
@@ -0,0 +1,127 @@
1
+ """Modular metadata extraction.
2
+
3
+ This package provides manufacturer-specific metadata extractors.
4
+ Each manufacturer module registers itself on import.
5
+
6
+ Usage:
7
+ from media_engine.extractors.metadata import extract_metadata
8
+
9
+ metadata = extract_metadata("/path/to/video.mp4")
10
+
11
+ To add a new manufacturer:
12
+ 1. Create a new module (e.g., panasonic.py)
13
+ 2. Implement a class with detect() and extract() methods
14
+ 3. Register it using: register_extractor("panasonic", PanasonicExtractor())
15
+ 4. Import the module below to trigger registration
16
+
17
+ The order of imports determines detection priority.
18
+ Specific manufacturers should be imported before generic.
19
+ """
20
+
21
+ import logging
22
+ from pathlib import Path
23
+
24
+ from media_engine.schemas import Metadata
25
+
26
+ # Import manufacturer modules to trigger registration
27
+ # Order matters: more specific extractors first
28
+ from . import (
29
+ apple, # noqa: F401
30
+ arri, # noqa: F401
31
+ blackmagic, # noqa: F401
32
+ camera_360, # noqa: F401 - Insta360, QooCam, GoPro MAX, etc.
33
+ canon, # noqa: F401
34
+ dji, # noqa: F401
35
+ dv, # noqa: F401 - DV/HDV tape formats
36
+ ffmpeg, # noqa: F401
37
+ gopro, # noqa: F401
38
+ red, # noqa: F401
39
+ sony, # noqa: F401
40
+ tesla, # noqa: F401
41
+ )
42
+ from .base import (
43
+ FFPROBE_WORKERS,
44
+ build_base_metadata,
45
+ extract_keyframes,
46
+ get_duration_fast,
47
+ run_ffprobe,
48
+ run_ffprobe_batch,
49
+ shutdown_ffprobe_pool,
50
+ )
51
+
52
+ # Import and register generic fallback LAST
53
+ from .generic import GenericExtractor
54
+ from .registry import get_extractor, list_extractors, register_extractor
55
+
56
+ register_extractor("generic", GenericExtractor())
57
+
58
+ logger = logging.getLogger(__name__)
59
+
60
+ __all__ = [
61
+ "extract_metadata",
62
+ "get_duration_fast",
63
+ "run_ffprobe_batch",
64
+ "list_extractors",
65
+ "FFPROBE_WORKERS",
66
+ "shutdown_ffprobe_pool",
67
+ ]
68
+
69
+
70
+ def extract_metadata(file_path: str, probe_data: dict | None = None) -> Metadata:
71
+ """Extract metadata from video file.
72
+
73
+ This function:
74
+ 1. Runs ffprobe to get basic metadata (or uses provided probe_data)
75
+ 2. Detects the manufacturer/device
76
+ 3. Calls the appropriate extractor for enhanced metadata
77
+
78
+ Args:
79
+ file_path: Path to video file
80
+ probe_data: Optional pre-fetched ffprobe data (for batch processing)
81
+
82
+ Returns:
83
+ Metadata object with video information
84
+ """
85
+ path = Path(file_path)
86
+ if not path.exists():
87
+ raise FileNotFoundError(f"Video file not found: {file_path}")
88
+
89
+ # Handle files that ffprobe cannot read (e.g., RED R3D)
90
+ # These formats require direct header parsing
91
+ ffprobe_unsupported = path.suffix.upper() in (".R3D",)
92
+
93
+ # Run ffprobe if not provided (and file format is supported)
94
+ if probe_data is None:
95
+ if ffprobe_unsupported:
96
+ # Create minimal probe_data for formats ffprobe can't read
97
+ probe_data = {"streams": [], "format": {"filename": file_path}}
98
+ logger.info(f"Skipping ffprobe for unsupported format: {path.suffix}")
99
+ else:
100
+ probe_data = run_ffprobe(file_path)
101
+
102
+ # Build base metadata (device-agnostic)
103
+ base_metadata = build_base_metadata(probe_data, file_path)
104
+
105
+ # Find and run the appropriate extractor
106
+ match = get_extractor(probe_data, file_path)
107
+
108
+ if match:
109
+ name, extractor = match
110
+ logger.info(f"Using {name} extractor for {path.name}")
111
+ try:
112
+ result = extractor.extract(probe_data, file_path, base_metadata)
113
+ except Exception as e:
114
+ logger.warning(f"Extractor {name} failed: {e}, using base metadata")
115
+ result = base_metadata
116
+ else:
117
+ # This shouldn't happen since generic always matches
118
+ logger.warning(f"No extractor matched for {path.name}")
119
+ result = base_metadata
120
+
121
+ # Extract keyframes (separate ffprobe call, fast with -skip_frame nokey)
122
+ # Done after extractor so it's not lost when extractor returns new Metadata
123
+ keyframes = extract_keyframes(file_path)
124
+ if keyframes:
125
+ result.keyframes = keyframes
126
+
127
+ return result
@@ -0,0 +1,169 @@
1
+ """Apple metadata extraction.
2
+
3
+ Handles Apple devices:
4
+ - iPhone (all models)
5
+ - iPad (all models)
6
+ - Mac (FaceTime camera, etc.)
7
+
8
+ Detection methods:
9
+ - make tag: "Apple"
10
+ - com.apple.quicktime.make tag
11
+ - Model contains "iPhone" or "iPad"
12
+
13
+ Apple QuickTime metadata tags:
14
+ - com.apple.quicktime.make
15
+ - com.apple.quicktime.model
16
+ - com.apple.quicktime.software
17
+ - com.apple.quicktime.creationdate
18
+ - com.apple.quicktime.location.iso6709
19
+ """
20
+
21
+ import logging
22
+ import re
23
+ from typing import Any
24
+
25
+ from media_engine.schemas import (
26
+ GPS,
27
+ DetectionMethod,
28
+ DeviceInfo,
29
+ MediaDeviceType,
30
+ Metadata,
31
+ )
32
+
33
+ from .registry import get_tags_lower, register_extractor
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ def _parse_apple_location(location: str) -> GPS | None:
39
+ """Parse Apple's ISO 6709 location string.
40
+
41
+ Format: +59.7441+010.2045+125.0/
42
+ """
43
+ pattern = r"([+-]\d+\.?\d*)"
44
+ matches = re.findall(pattern, location)
45
+
46
+ if len(matches) >= 2:
47
+ try:
48
+ return GPS(
49
+ latitude=float(matches[0]),
50
+ longitude=float(matches[1]),
51
+ altitude=float(matches[2]) if len(matches) >= 3 else None,
52
+ )
53
+ except ValueError:
54
+ pass
55
+
56
+ return None
57
+
58
+
59
+ def _determine_device_type(model: str | None) -> MediaDeviceType:
60
+ """Determine Apple device type from model string."""
61
+ if not model:
62
+ return MediaDeviceType.PHONE
63
+
64
+ model_upper = model.upper()
65
+
66
+ if "IPHONE" in model_upper:
67
+ return MediaDeviceType.PHONE
68
+ elif "IPAD" in model_upper:
69
+ return MediaDeviceType.PHONE # Tablets are close to phones
70
+ elif "MAC" in model_upper or "IMAC" in model_upper or "MACBOOK" in model_upper:
71
+ return MediaDeviceType.CAMERA # Mac webcams are cameras
72
+ else:
73
+ return MediaDeviceType.PHONE
74
+
75
+
76
+ def _clean_model_name(model: str | None) -> str | None:
77
+ """Clean up Apple model name for display.
78
+
79
+ Examples:
80
+ "iPhone 15 Pro Max" -> "iPhone 15 Pro Max"
81
+ "iPhone15,3" -> "iPhone 15 Pro Max" (if we had a lookup table)
82
+ """
83
+ if not model:
84
+ return None
85
+
86
+ # Apple sometimes uses internal model identifiers like "iPhone15,3"
87
+ # For now, just return as-is. A future enhancement could add a lookup table.
88
+ return model
89
+
90
+
91
+ class AppleExtractor:
92
+ """Metadata extractor for Apple devices."""
93
+
94
+ def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
95
+ """Detect if file is from an Apple device."""
96
+ tags = get_tags_lower(probe_data)
97
+
98
+ # Check make tag
99
+ make = tags.get("make") or tags.get("com.apple.quicktime.make")
100
+ if make and "APPLE" in make.upper():
101
+ return True
102
+
103
+ # Check model for iPhone/iPad
104
+ model = tags.get("model") or tags.get("com.apple.quicktime.model")
105
+ if model:
106
+ model_upper = model.upper()
107
+ if "IPHONE" in model_upper or "IPAD" in model_upper:
108
+ return True
109
+
110
+ # Check for Apple QuickTime-specific tags
111
+ if tags.get("com.apple.quicktime.creationdate"):
112
+ # This is a strong indicator of Apple origin
113
+ if tags.get("com.apple.quicktime.make") or tags.get("com.apple.quicktime.model"):
114
+ return True
115
+
116
+ return False
117
+
118
+ def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
119
+ """Extract Apple-specific metadata."""
120
+ tags = get_tags_lower(probe_data)
121
+
122
+ # Get device info from QuickTime tags (preferred) or standard tags
123
+ make = tags.get("com.apple.quicktime.make") or tags.get("make") or "Apple"
124
+ model = tags.get("com.apple.quicktime.model") or tags.get("model")
125
+ software = tags.get("com.apple.quicktime.software") or tags.get("software")
126
+
127
+ # Clean up model name
128
+ model = _clean_model_name(model)
129
+
130
+ # Determine device type
131
+ device_type = _determine_device_type(model)
132
+
133
+ device = DeviceInfo(
134
+ make=make,
135
+ model=model,
136
+ software=software,
137
+ type=device_type,
138
+ detection_method=DetectionMethod.METADATA,
139
+ confidence=1.0,
140
+ )
141
+
142
+ # Extract GPS from Apple-specific location tag
143
+ gps = base_metadata.gps
144
+ apple_location = tags.get("com.apple.quicktime.location.iso6709")
145
+ if apple_location:
146
+ parsed_gps = _parse_apple_location(apple_location)
147
+ if parsed_gps:
148
+ gps = parsed_gps
149
+
150
+ return Metadata(
151
+ duration=base_metadata.duration,
152
+ resolution=base_metadata.resolution,
153
+ codec=base_metadata.codec,
154
+ video_codec=base_metadata.video_codec,
155
+ audio=base_metadata.audio,
156
+ fps=base_metadata.fps,
157
+ bitrate=base_metadata.bitrate,
158
+ file_size=base_metadata.file_size,
159
+ timecode=base_metadata.timecode,
160
+ created_at=base_metadata.created_at,
161
+ device=device,
162
+ gps=gps,
163
+ color_space=base_metadata.color_space,
164
+ lens=base_metadata.lens,
165
+ )
166
+
167
+
168
+ # Register this extractor
169
+ register_extractor("apple", AppleExtractor())