media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,688 @@
1
+ """Base utilities for metadata extraction."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ import subprocess
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from dataclasses import dataclass
10
+ from datetime import datetime
11
+ from typing import Any
12
+
13
+ from media_engine.schemas import (
14
+ GPS,
15
+ AudioInfo,
16
+ Codec,
17
+ ColorSpace,
18
+ DetectionMethod,
19
+ KeyframeInfo,
20
+ LensInfo,
21
+ Metadata,
22
+ Resolution,
23
+ Stereo3D,
24
+ Stereo3DMode,
25
+ VideoCodec,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Pool for parallel ffprobe calls
31
+ _ffprobe_pool: ThreadPoolExecutor | None = None
32
+
33
+ # Number of workers based on CPU cores (leave 2 cores free, minimum 2 workers)
34
+ FFPROBE_WORKERS = max(2, (os.cpu_count() or 4) - 2)
35
+
36
+ # Timeout for ffprobe calls (seconds)
37
+ FFPROBE_TIMEOUT = 30
38
+
39
+
40
+ def get_ffprobe_pool() -> ThreadPoolExecutor:
41
+ """Get or create the ffprobe thread pool."""
42
+ global _ffprobe_pool
43
+ if _ffprobe_pool is None:
44
+ _ffprobe_pool = ThreadPoolExecutor(max_workers=FFPROBE_WORKERS, thread_name_prefix="ffprobe")
45
+ logger.info(f"Created ffprobe pool with {FFPROBE_WORKERS} workers")
46
+ return _ffprobe_pool
47
+
48
+
49
+ def shutdown_ffprobe_pool() -> None:
50
+ """Shutdown the ffprobe pool (call on app shutdown)."""
51
+ global _ffprobe_pool
52
+ if _ffprobe_pool is not None:
53
+ _ffprobe_pool.shutdown(wait=False)
54
+ _ffprobe_pool = None
55
+
56
+
57
+ @dataclass
58
+ class GPSCoordinates:
59
+ """Parsed GPS coordinates from ISO 6709 format."""
60
+
61
+ latitude: float
62
+ longitude: float
63
+ altitude: float | None = None
64
+
65
+
66
+ @dataclass
67
+ class SidecarMetadata:
68
+ """Metadata extracted from sidecar files."""
69
+
70
+ device: Any | None = None # DeviceInfo
71
+ gps: GPS | None = None
72
+ gps_track: Any | None = None # GPSTrack
73
+ color_space: ColorSpace | None = None
74
+ lens: LensInfo | None = None
75
+ created_at: datetime | None = None
76
+
77
+
78
+ def run_ffprobe(file_path: str) -> dict[str, Any]:
79
+ """Run ffprobe and return parsed JSON output.
80
+
81
+ Args:
82
+ file_path: Path to the media file
83
+ """
84
+ cmd = [
85
+ "ffprobe",
86
+ "-v",
87
+ "error", # Show errors (quiet suppresses them, hiding why probes fail)
88
+ "-print_format",
89
+ "json",
90
+ "-show_format",
91
+ "-show_streams",
92
+ ]
93
+
94
+ # Note: -select_streams with comma syntax (v:0,a:0) doesn't work reliably
95
+ # across ffprobe versions, so we get all streams and filter in code
96
+
97
+ cmd.append(file_path)
98
+
99
+ try:
100
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=FFPROBE_TIMEOUT)
101
+ return json.loads(result.stdout)
102
+ except subprocess.TimeoutExpired:
103
+ logger.error(f"ffprobe timed out after {FFPROBE_TIMEOUT}s for {file_path}")
104
+ raise RuntimeError(f"ffprobe timed out for {file_path}")
105
+ except subprocess.CalledProcessError as e:
106
+ logger.error(f"ffprobe failed: {e.stderr}")
107
+ raise RuntimeError(f"ffprobe failed for {file_path}: {e.stderr}")
108
+ except json.JSONDecodeError as e:
109
+ logger.error(f"Failed to parse ffprobe output: {e}")
110
+ raise RuntimeError(f"Failed to parse ffprobe output: {e}")
111
+
112
+
113
+ def get_video_info(file_path: str) -> tuple[float, float, int, int]:
114
+ """Get basic video info using ffprobe.
115
+
116
+ This is a lightweight probe that only fetches video stream info,
117
+ useful for frame extraction where full probe data isn't needed.
118
+
119
+ Handles edge cases like files with multiple video streams (AVCHD)
120
+ that cause ffprobe to output multiple lines.
121
+
122
+ Args:
123
+ file_path: Path to the video file
124
+
125
+ Returns:
126
+ Tuple of (fps, duration, width, height)
127
+ """
128
+ cmd = [
129
+ "ffprobe",
130
+ "-v",
131
+ "error",
132
+ "-select_streams",
133
+ "v:0",
134
+ "-show_entries",
135
+ "stream=width,height,r_frame_rate,duration",
136
+ "-of",
137
+ "csv=p=0",
138
+ file_path,
139
+ ]
140
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
141
+
142
+ # Take only first line (some files have multiple video streams)
143
+ first_line = result.stdout.strip().split("\n")[0]
144
+ parts = first_line.split(",")
145
+
146
+ # Output format: width,height,fps,duration
147
+ width = int(parts[0]) if parts and parts[0] else 1920
148
+ height = int(parts[1]) if len(parts) > 1 and parts[1] else 1080
149
+
150
+ # Parse frame rate (can be "30/1" or "29.97")
151
+ fps_str = parts[2] if len(parts) > 2 else "30"
152
+ if "/" in fps_str:
153
+ num, den = fps_str.split("/")
154
+ fps = float(num) / float(den) if float(den) > 0 else 30.0
155
+ else:
156
+ fps = float(fps_str) if fps_str else 30.0
157
+
158
+ # Duration might be in stream or need to get from format
159
+ duration = float(parts[3]) if len(parts) > 3 and parts[3] else 0
160
+
161
+ if duration == 0:
162
+ # Try getting duration from format
163
+ cmd2 = [
164
+ "ffprobe",
165
+ "-v",
166
+ "error",
167
+ "-show_entries",
168
+ "format=duration",
169
+ "-of",
170
+ "csv=p=0",
171
+ file_path,
172
+ ]
173
+ result2 = subprocess.run(cmd2, capture_output=True, text=True, timeout=30)
174
+ first_line2 = result2.stdout.strip().split("\n")[0]
175
+ duration = float(first_line2) if first_line2 else 0
176
+
177
+ return fps, duration, width, height
178
+
179
+
180
+ def get_duration_fast(file_path: str) -> float | None:
181
+ """Get just the duration of a video file.
182
+
183
+ This is a lightweight wrapper around get_video_info for when
184
+ only duration is needed (e.g., for ETA predictions).
185
+
186
+ Args:
187
+ file_path: Path to the video file
188
+
189
+ Returns:
190
+ Duration in seconds, or None if couldn't be determined
191
+ """
192
+ try:
193
+ _, duration, _, _ = get_video_info(file_path)
194
+ return duration if duration > 0 else None
195
+ except Exception:
196
+ return None
197
+
198
+
199
+ def run_ffprobe_batch(
200
+ file_paths: list[str],
201
+ ) -> dict[str, dict[str, Any] | Exception]:
202
+ """Run ffprobe on multiple files in parallel.
203
+
204
+ Args:
205
+ file_paths: List of file paths to probe
206
+
207
+ Returns:
208
+ Dict mapping file path to probe result or Exception if failed
209
+ """
210
+ if not file_paths:
211
+ return {}
212
+
213
+ pool = get_ffprobe_pool()
214
+ futures = {pool.submit(run_ffprobe, path): path for path in file_paths}
215
+
216
+ results: dict[str, dict[str, Any] | Exception] = {}
217
+ for future in as_completed(futures):
218
+ path = futures[future]
219
+ try:
220
+ results[path] = future.result()
221
+ except Exception as e:
222
+ logger.warning(f"ffprobe failed for {path}: {e}")
223
+ results[path] = e
224
+
225
+ return results
226
+
227
+
228
+ def extract_keyframes(file_path: str, timeout: int = 60) -> KeyframeInfo | None:
229
+ """Extract keyframe (I-frame) timestamps from video.
230
+
231
+ Uses ffprobe with -skip_frame nokey for fast keyframe-only extraction.
232
+
233
+ Args:
234
+ file_path: Path to video file
235
+ timeout: Timeout in seconds (keyframe extraction can be slow for long videos)
236
+
237
+ Returns:
238
+ KeyframeInfo with timestamps and analysis, or None if extraction fails
239
+ """
240
+ cmd = [
241
+ "ffprobe",
242
+ "-v",
243
+ "error",
244
+ "-select_streams",
245
+ "v:0",
246
+ "-skip_frame",
247
+ "nokey",
248
+ "-show_entries",
249
+ "frame=pts_time",
250
+ "-of",
251
+ "csv=p=0",
252
+ file_path,
253
+ ]
254
+
255
+ try:
256
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=timeout)
257
+
258
+ # Parse timestamps from output (one per line)
259
+ timestamps: list[float] = []
260
+ for line in result.stdout.strip().split("\n"):
261
+ line = line.strip()
262
+ if line:
263
+ try:
264
+ timestamps.append(float(line))
265
+ except ValueError:
266
+ continue
267
+
268
+ if not timestamps:
269
+ return None
270
+
271
+ # Analyze interval pattern
272
+ is_fixed, avg_interval = _analyze_keyframe_intervals(timestamps)
273
+
274
+ return KeyframeInfo(
275
+ timestamps=timestamps,
276
+ count=len(timestamps),
277
+ is_fixed_interval=is_fixed,
278
+ avg_interval=avg_interval,
279
+ )
280
+
281
+ except subprocess.TimeoutExpired:
282
+ logger.warning(f"Keyframe extraction timed out for {file_path}")
283
+ return None
284
+ except subprocess.CalledProcessError as e:
285
+ logger.warning(f"Keyframe extraction failed for {file_path}: {e.stderr}")
286
+ return None
287
+ except Exception as e:
288
+ logger.warning(f"Keyframe extraction error for {file_path}: {e}")
289
+ return None
290
+
291
+
292
+ def _analyze_keyframe_intervals(timestamps: list[float]) -> tuple[bool, float | None]:
293
+ """Analyze keyframe intervals to detect fixed GOP vs scene cuts.
294
+
295
+ Args:
296
+ timestamps: List of keyframe timestamps in seconds
297
+
298
+ Returns:
299
+ Tuple of (is_fixed_interval, average_interval)
300
+ is_fixed_interval is True if keyframes appear at regular intervals (GOP)
301
+ """
302
+ if len(timestamps) < 2:
303
+ return False, None
304
+
305
+ # Calculate intervals between keyframes
306
+ intervals = [timestamps[i + 1] - timestamps[i] for i in range(len(timestamps) - 1)]
307
+
308
+ avg_interval = sum(intervals) / len(intervals)
309
+
310
+ if avg_interval == 0:
311
+ return False, 0.0
312
+
313
+ # Check if intervals are consistent (within 20% of average)
314
+ # Fixed GOP will have very consistent intervals
315
+ # Scene cuts will have irregular intervals
316
+ variance_threshold = 0.2 # 20% variance allowed for "fixed"
317
+ consistent_count = sum(1 for interval in intervals if abs(interval - avg_interval) / avg_interval < variance_threshold)
318
+
319
+ # If 80%+ of intervals are consistent, consider it fixed GOP
320
+ is_fixed = consistent_count / len(intervals) >= 0.8
321
+
322
+ return is_fixed, round(avg_interval, 3)
323
+
324
+
325
+ def parse_fps(video_stream: dict[str, Any]) -> float | None:
326
+ """Parse frame rate from video stream."""
327
+ # Try avg_frame_rate first
328
+ fps_str = video_stream.get("avg_frame_rate", "")
329
+ if fps_str and "/" in fps_str:
330
+ num, den = fps_str.split("/")
331
+ if int(den) != 0:
332
+ return round(int(num) / int(den), 2)
333
+
334
+ # Fall back to r_frame_rate
335
+ fps_str = video_stream.get("r_frame_rate", "")
336
+ if fps_str and "/" in fps_str:
337
+ num, den = fps_str.split("/")
338
+ if int(den) != 0:
339
+ return round(int(num) / int(den), 2)
340
+
341
+ return None
342
+
343
+
344
+ def parse_bit_depth(video_stream: dict[str, Any]) -> int | None:
345
+ """Parse bit depth from video stream."""
346
+ # Try bits_per_raw_sample first
347
+ bits = video_stream.get("bits_per_raw_sample")
348
+ if bits:
349
+ try:
350
+ return int(bits)
351
+ except ValueError:
352
+ pass
353
+
354
+ # Parse from pixel format (e.g., yuv420p10le, yuv422p10be)
355
+ pix_fmt = video_stream.get("pix_fmt", "")
356
+ if pix_fmt:
357
+ match = re.search(r"(\d+)(le|be)?$", pix_fmt)
358
+ if match:
359
+ depth = int(match.group(1))
360
+ if depth in (10, 12, 16):
361
+ return depth
362
+ if pix_fmt in ("yuv420p", "yuv422p", "yuv444p", "yuvj420p", "yuvj422p"):
363
+ return 8
364
+
365
+ return None
366
+
367
+
368
+ def extract_timecode(tags: dict[str, str], video_stream: dict[str, Any] | None) -> str | None:
369
+ """Extract start timecode from metadata."""
370
+ tags_lower = {k.lower(): v for k, v in tags.items()}
371
+
372
+ tc = tags_lower.get("timecode")
373
+ if tc:
374
+ return tc
375
+
376
+ if video_stream:
377
+ stream_tags = video_stream.get("tags", {})
378
+ stream_tags_lower = {k.lower(): v for k, v in stream_tags.items()}
379
+ tc = stream_tags_lower.get("timecode")
380
+ if tc:
381
+ return tc
382
+
383
+ return None
384
+
385
+
386
+ def parse_creation_time(tags: dict[str, str], stream_tags: dict[str, str] | None = None) -> datetime | None:
387
+ """Parse creation time from metadata tags.
388
+
389
+ Checks format-level tags first, then stream tags as fallback.
390
+ Normalizes keys to lowercase for case-insensitive lookup.
391
+ """
392
+ # Normalize keys to lowercase for case-insensitive lookup
393
+ tags_lower = {k.lower(): v for k, v in tags.items()}
394
+
395
+ time_str = (
396
+ tags_lower.get("creation_time")
397
+ or tags_lower.get("date")
398
+ or tags_lower.get("com.apple.quicktime.creationdate")
399
+ or tags_lower.get("date_recorded")
400
+ or tags_lower.get("date-eng") # Some MKV files
401
+ or tags_lower.get("modification_date") # Canon MXF files
402
+ )
403
+
404
+ # Fallback to stream tags if format tags don't have the date
405
+ if not time_str and stream_tags:
406
+ stream_tags_lower = {k.lower(): v for k, v in stream_tags.items()}
407
+ time_str = stream_tags_lower.get("creation_time") or stream_tags_lower.get("date")
408
+
409
+ if not time_str:
410
+ return None
411
+
412
+ # Handle timezone suffixes by stripping them for parsing
413
+ # ffprobe can return: "2024-06-15T10:30:00.000000Z"
414
+ # or "2024-06-15 10:30:00+0200"
415
+ # or "2024-06-15T10:30:00+02:00"
416
+ time_str_clean = time_str.strip()
417
+
418
+ # Remove timezone offset for parsing (we'll treat as UTC if present)
419
+ # Patterns like +0200, +02:00, -0500, -05:00
420
+ tz_pattern = r"[+-]\d{2}:?\d{2}$"
421
+ time_str_no_tz = re.sub(tz_pattern, "", time_str_clean)
422
+
423
+ formats = [
424
+ ("%Y-%m-%dT%H:%M:%S.%f", None), # Variable microseconds
425
+ ("%Y-%m-%dT%H:%M:%S", 19),
426
+ ("%Y-%m-%d %H:%M:%S", 19),
427
+ ("%Y:%m:%d %H:%M:%S", 19), # EXIF format
428
+ ("%Y/%m/%d %H:%M:%S", 19),
429
+ ("%d/%m/%Y %H:%M:%S", 19), # European format
430
+ ("%Y-%m-%d", 10), # Date only
431
+ ]
432
+
433
+ for fmt, length in formats:
434
+ try:
435
+ if length:
436
+ return datetime.strptime(time_str_no_tz[:length], fmt)
437
+ else:
438
+ # Variable length (for microseconds)
439
+ # Find the 'T' and parse accordingly
440
+ if "T" in time_str_no_tz:
441
+ return datetime.strptime(time_str_no_tz.rstrip("Z"), fmt)
442
+ except ValueError:
443
+ continue
444
+
445
+ logger.warning(f"Could not parse creation time: {time_str}")
446
+ return None
447
+
448
+
449
+ def parse_iso6709(location: str) -> GPSCoordinates | None:
450
+ """Parse ISO 6709 format GPS coordinates."""
451
+ pattern = r"([+-]\d+\.?\d*)"
452
+ matches = re.findall(pattern, location)
453
+
454
+ if len(matches) >= 2:
455
+ try:
456
+ return GPSCoordinates(
457
+ latitude=float(matches[0]),
458
+ longitude=float(matches[1]),
459
+ altitude=float(matches[2]) if len(matches) >= 3 else None,
460
+ )
461
+ except ValueError:
462
+ pass
463
+
464
+ return None
465
+
466
+
467
+ def parse_dms_coordinate(dms: str, ref: str | None) -> float | None:
468
+ """Parse DMS (degrees;minutes;seconds) format to decimal degrees.
469
+
470
+ Handles multiple formats:
471
+ - 63;6;38.880 (all semicolons)
472
+ - 63;6:38.880 (mixed semicolon and colon)
473
+ """
474
+ try:
475
+ # Normalize: replace colons with semicolons
476
+ normalized = dms.replace(":", ";")
477
+ parts = normalized.split(";")
478
+ if len(parts) != 3:
479
+ return float(dms)
480
+
481
+ degrees = float(parts[0])
482
+ minutes = float(parts[1])
483
+ seconds = float(parts[2])
484
+
485
+ decimal = degrees + minutes / 60 + seconds / 3600
486
+
487
+ if ref in ("S", "W"):
488
+ decimal = -decimal
489
+
490
+ return decimal
491
+ except (ValueError, IndexError):
492
+ return None
493
+
494
+
495
+ def extract_gps_from_tags(tags: dict[str, str]) -> GPS | None:
496
+ """Extract GPS coordinates from metadata tags."""
497
+ tags_lower = {k.lower(): v for k, v in tags.items()}
498
+
499
+ location = tags_lower.get("location") or tags_lower.get("com.apple.quicktime.location.iso6709") or tags_lower.get("gps")
500
+
501
+ if location:
502
+ coords = parse_iso6709(location)
503
+ if coords:
504
+ return GPS(
505
+ latitude=coords.latitude,
506
+ longitude=coords.longitude,
507
+ altitude=coords.altitude,
508
+ )
509
+
510
+ lat = tags_lower.get("gps_latitude") or tags_lower.get("location-latitude")
511
+ lon = tags_lower.get("gps_longitude") or tags_lower.get("location-longitude")
512
+
513
+ if lat and lon:
514
+ try:
515
+ return GPS(
516
+ latitude=float(lat),
517
+ longitude=float(lon),
518
+ altitude=float(tags_lower.get("gps_altitude", 0)) or None,
519
+ )
520
+ except ValueError:
521
+ pass
522
+
523
+ return None
524
+
525
+
526
+ def extract_color_space_from_stream(video_stream: dict[str, Any] | None, tags: dict[str, str]) -> ColorSpace | None:
527
+ """Extract color space information from video stream and format tags."""
528
+ transfer: str | None = None
529
+ primaries: str | None = None
530
+ matrix: str | None = None
531
+
532
+ if video_stream:
533
+ transfer = video_stream.get("color_transfer")
534
+ primaries = video_stream.get("color_primaries")
535
+ matrix = video_stream.get("color_space")
536
+
537
+ tags_lower = {k.lower(): v for k, v in tags.items()}
538
+ custom_gamma = tags_lower.get("com.apple.proapps.customgamma", "")
539
+ if custom_gamma:
540
+ parts = custom_gamma.split(".")
541
+ if parts:
542
+ transfer = parts[-1]
543
+
544
+ if not (transfer or primaries or matrix):
545
+ return None
546
+
547
+ return ColorSpace(
548
+ transfer=transfer,
549
+ primaries=primaries,
550
+ matrix=matrix,
551
+ detection_method=DetectionMethod.METADATA,
552
+ )
553
+
554
+
555
+ def detect_stereo_3d(probe_data: dict[str, Any]) -> Stereo3D | None:
556
+ """Detect stereoscopic 3D video format.
557
+
558
+ Detection methods:
559
+ - MVC: Two H.264 video streams (base view + dependent view)
560
+ - Metadata tags: stereo_mode, stereo3d tags
561
+
562
+ Returns:
563
+ Stereo3D info if 3D is detected, None otherwise.
564
+ """
565
+ streams = probe_data.get("streams", [])
566
+
567
+ # Count video streams
568
+ video_streams = [s for s in streams if s.get("codec_type") == "video"]
569
+
570
+ # MVC detection: Two H.264 video streams where second has 0x0 dimensions
571
+ # (dependent view references base view)
572
+ if len(video_streams) >= 2:
573
+ first_video = video_streams[0]
574
+ second_video = video_streams[1]
575
+
576
+ if first_video.get("codec_name") == "h264" and second_video.get("codec_name") == "h264":
577
+ first_width = first_video.get("width", 0)
578
+ second_width = second_video.get("width", 0)
579
+
580
+ # MVC dependent view typically has 0x0 dimensions
581
+ if first_width > 0 and second_width == 0:
582
+ logger.info("Detected MVC stereoscopic 3D (two H.264 streams)")
583
+ return Stereo3D(
584
+ mode=Stereo3DMode.MVC,
585
+ eye_count=2,
586
+ has_left_eye=True,
587
+ has_right_eye=True,
588
+ detection_method=DetectionMethod.METADATA,
589
+ )
590
+
591
+ # Check for stereo_mode metadata tag (used by some 360 cameras and encoders)
592
+ tags = probe_data.get("format", {}).get("tags", {})
593
+ tags_lower = {k.lower(): v for k, v in tags.items()}
594
+
595
+ stereo_mode = tags_lower.get("stereo_mode") or tags_lower.get("stereo3d")
596
+ if stereo_mode:
597
+ mode_lower = stereo_mode.lower()
598
+ if "side" in mode_lower or "sbs" in mode_lower:
599
+ return Stereo3D(
600
+ mode=Stereo3DMode.SIDE_BY_SIDE,
601
+ detection_method=DetectionMethod.METADATA,
602
+ )
603
+ elif "top" in mode_lower or "over" in mode_lower or "tab" in mode_lower:
604
+ return Stereo3D(
605
+ mode=Stereo3DMode.TOP_BOTTOM,
606
+ detection_method=DetectionMethod.METADATA,
607
+ )
608
+
609
+ return None
610
+
611
+
612
+ def build_base_metadata(
613
+ probe_data: dict[str, Any],
614
+ file_path: str,
615
+ ) -> Metadata:
616
+ """Build base metadata from ffprobe data without device-specific processing."""
617
+ format_info = probe_data.get("format", {})
618
+ tags = format_info.get("tags", {})
619
+
620
+ video_stream = None
621
+ audio_stream = None
622
+ for stream in probe_data.get("streams", []):
623
+ if stream.get("codec_type") == "video" and video_stream is None:
624
+ video_stream = stream
625
+ elif stream.get("codec_type") == "audio" and audio_stream is None:
626
+ audio_stream = stream
627
+
628
+ resolution = Resolution(
629
+ width=video_stream.get("width", 0) if video_stream else 0,
630
+ height=video_stream.get("height", 0) if video_stream else 0,
631
+ )
632
+
633
+ codec = Codec(
634
+ video=video_stream.get("codec_name") if video_stream else None,
635
+ audio=audio_stream.get("codec_name") if audio_stream else None,
636
+ )
637
+
638
+ video_codec: VideoCodec | None = None
639
+ if video_stream:
640
+ video_codec = VideoCodec(
641
+ name=video_stream.get("codec_name", "unknown"),
642
+ profile=video_stream.get("profile"),
643
+ bit_depth=parse_bit_depth(video_stream),
644
+ pixel_format=video_stream.get("pix_fmt"),
645
+ )
646
+
647
+ audio_info: AudioInfo | None = None
648
+ if audio_stream:
649
+ audio_info = AudioInfo(
650
+ codec=audio_stream.get("codec_name"),
651
+ sample_rate=int(audio_stream.get("sample_rate", 0)) or None,
652
+ channels=audio_stream.get("channels"),
653
+ bit_depth=audio_stream.get("bits_per_sample") or audio_stream.get("bits_per_raw_sample"),
654
+ bitrate=int(audio_stream.get("bit_rate", 0)) or None,
655
+ )
656
+
657
+ fps = parse_fps(video_stream) if video_stream else None
658
+ duration = float(format_info.get("duration", 0))
659
+ bitrate = int(format_info.get("bit_rate", 0)) if format_info.get("bit_rate") else None
660
+ file_size = os.path.getsize(file_path)
661
+
662
+ # Get stream tags for fallback date extraction
663
+ video_stream_tags = video_stream.get("tags", {}) if video_stream else None
664
+ created_at = parse_creation_time(tags, video_stream_tags)
665
+ timecode = extract_timecode(tags, video_stream)
666
+ gps = extract_gps_from_tags(tags)
667
+ color_space = extract_color_space_from_stream(video_stream, tags)
668
+
669
+ # Detect stereoscopic 3D
670
+ stereo_3d = detect_stereo_3d(probe_data)
671
+
672
+ return Metadata(
673
+ duration=duration,
674
+ resolution=resolution,
675
+ codec=codec,
676
+ video_codec=video_codec,
677
+ audio=audio_info,
678
+ fps=fps,
679
+ bitrate=bitrate,
680
+ file_size=file_size,
681
+ timecode=timecode,
682
+ created_at=created_at,
683
+ device=None,
684
+ gps=gps,
685
+ color_space=color_space,
686
+ lens=None,
687
+ stereo_3d=stereo_3d,
688
+ )