media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
"""Base utilities for metadata extraction."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import subprocess
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from media_engine.schemas import (
|
|
14
|
+
GPS,
|
|
15
|
+
AudioInfo,
|
|
16
|
+
Codec,
|
|
17
|
+
ColorSpace,
|
|
18
|
+
DetectionMethod,
|
|
19
|
+
KeyframeInfo,
|
|
20
|
+
LensInfo,
|
|
21
|
+
Metadata,
|
|
22
|
+
Resolution,
|
|
23
|
+
Stereo3D,
|
|
24
|
+
Stereo3DMode,
|
|
25
|
+
VideoCodec,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
# Pool for parallel ffprobe calls
|
|
31
|
+
_ffprobe_pool: ThreadPoolExecutor | None = None
|
|
32
|
+
|
|
33
|
+
# Number of workers based on CPU cores (leave 2 cores free, minimum 2 workers)
|
|
34
|
+
FFPROBE_WORKERS = max(2, (os.cpu_count() or 4) - 2)
|
|
35
|
+
|
|
36
|
+
# Timeout for ffprobe calls (seconds)
|
|
37
|
+
FFPROBE_TIMEOUT = 30
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_ffprobe_pool() -> ThreadPoolExecutor:
|
|
41
|
+
"""Get or create the ffprobe thread pool."""
|
|
42
|
+
global _ffprobe_pool
|
|
43
|
+
if _ffprobe_pool is None:
|
|
44
|
+
_ffprobe_pool = ThreadPoolExecutor(max_workers=FFPROBE_WORKERS, thread_name_prefix="ffprobe")
|
|
45
|
+
logger.info(f"Created ffprobe pool with {FFPROBE_WORKERS} workers")
|
|
46
|
+
return _ffprobe_pool
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def shutdown_ffprobe_pool() -> None:
|
|
50
|
+
"""Shutdown the ffprobe pool (call on app shutdown)."""
|
|
51
|
+
global _ffprobe_pool
|
|
52
|
+
if _ffprobe_pool is not None:
|
|
53
|
+
_ffprobe_pool.shutdown(wait=False)
|
|
54
|
+
_ffprobe_pool = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class GPSCoordinates:
|
|
59
|
+
"""Parsed GPS coordinates from ISO 6709 format."""
|
|
60
|
+
|
|
61
|
+
latitude: float
|
|
62
|
+
longitude: float
|
|
63
|
+
altitude: float | None = None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class SidecarMetadata:
|
|
68
|
+
"""Metadata extracted from sidecar files."""
|
|
69
|
+
|
|
70
|
+
device: Any | None = None # DeviceInfo
|
|
71
|
+
gps: GPS | None = None
|
|
72
|
+
gps_track: Any | None = None # GPSTrack
|
|
73
|
+
color_space: ColorSpace | None = None
|
|
74
|
+
lens: LensInfo | None = None
|
|
75
|
+
created_at: datetime | None = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def run_ffprobe(file_path: str) -> dict[str, Any]:
|
|
79
|
+
"""Run ffprobe and return parsed JSON output.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
file_path: Path to the media file
|
|
83
|
+
"""
|
|
84
|
+
cmd = [
|
|
85
|
+
"ffprobe",
|
|
86
|
+
"-v",
|
|
87
|
+
"error", # Show errors (quiet suppresses them, hiding why probes fail)
|
|
88
|
+
"-print_format",
|
|
89
|
+
"json",
|
|
90
|
+
"-show_format",
|
|
91
|
+
"-show_streams",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
# Note: -select_streams with comma syntax (v:0,a:0) doesn't work reliably
|
|
95
|
+
# across ffprobe versions, so we get all streams and filter in code
|
|
96
|
+
|
|
97
|
+
cmd.append(file_path)
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=FFPROBE_TIMEOUT)
|
|
101
|
+
return json.loads(result.stdout)
|
|
102
|
+
except subprocess.TimeoutExpired:
|
|
103
|
+
logger.error(f"ffprobe timed out after {FFPROBE_TIMEOUT}s for {file_path}")
|
|
104
|
+
raise RuntimeError(f"ffprobe timed out for {file_path}")
|
|
105
|
+
except subprocess.CalledProcessError as e:
|
|
106
|
+
logger.error(f"ffprobe failed: {e.stderr}")
|
|
107
|
+
raise RuntimeError(f"ffprobe failed for {file_path}: {e.stderr}")
|
|
108
|
+
except json.JSONDecodeError as e:
|
|
109
|
+
logger.error(f"Failed to parse ffprobe output: {e}")
|
|
110
|
+
raise RuntimeError(f"Failed to parse ffprobe output: {e}")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_video_info(file_path: str) -> tuple[float, float, int, int]:
|
|
114
|
+
"""Get basic video info using ffprobe.
|
|
115
|
+
|
|
116
|
+
This is a lightweight probe that only fetches video stream info,
|
|
117
|
+
useful for frame extraction where full probe data isn't needed.
|
|
118
|
+
|
|
119
|
+
Handles edge cases like files with multiple video streams (AVCHD)
|
|
120
|
+
that cause ffprobe to output multiple lines.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
file_path: Path to the video file
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Tuple of (fps, duration, width, height)
|
|
127
|
+
"""
|
|
128
|
+
cmd = [
|
|
129
|
+
"ffprobe",
|
|
130
|
+
"-v",
|
|
131
|
+
"error",
|
|
132
|
+
"-select_streams",
|
|
133
|
+
"v:0",
|
|
134
|
+
"-show_entries",
|
|
135
|
+
"stream=width,height,r_frame_rate,duration",
|
|
136
|
+
"-of",
|
|
137
|
+
"csv=p=0",
|
|
138
|
+
file_path,
|
|
139
|
+
]
|
|
140
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
141
|
+
|
|
142
|
+
# Take only first line (some files have multiple video streams)
|
|
143
|
+
first_line = result.stdout.strip().split("\n")[0]
|
|
144
|
+
parts = first_line.split(",")
|
|
145
|
+
|
|
146
|
+
# Output format: width,height,fps,duration
|
|
147
|
+
width = int(parts[0]) if parts and parts[0] else 1920
|
|
148
|
+
height = int(parts[1]) if len(parts) > 1 and parts[1] else 1080
|
|
149
|
+
|
|
150
|
+
# Parse frame rate (can be "30/1" or "29.97")
|
|
151
|
+
fps_str = parts[2] if len(parts) > 2 else "30"
|
|
152
|
+
if "/" in fps_str:
|
|
153
|
+
num, den = fps_str.split("/")
|
|
154
|
+
fps = float(num) / float(den) if float(den) > 0 else 30.0
|
|
155
|
+
else:
|
|
156
|
+
fps = float(fps_str) if fps_str else 30.0
|
|
157
|
+
|
|
158
|
+
# Duration might be in stream or need to get from format
|
|
159
|
+
duration = float(parts[3]) if len(parts) > 3 and parts[3] else 0
|
|
160
|
+
|
|
161
|
+
if duration == 0:
|
|
162
|
+
# Try getting duration from format
|
|
163
|
+
cmd2 = [
|
|
164
|
+
"ffprobe",
|
|
165
|
+
"-v",
|
|
166
|
+
"error",
|
|
167
|
+
"-show_entries",
|
|
168
|
+
"format=duration",
|
|
169
|
+
"-of",
|
|
170
|
+
"csv=p=0",
|
|
171
|
+
file_path,
|
|
172
|
+
]
|
|
173
|
+
result2 = subprocess.run(cmd2, capture_output=True, text=True, timeout=30)
|
|
174
|
+
first_line2 = result2.stdout.strip().split("\n")[0]
|
|
175
|
+
duration = float(first_line2) if first_line2 else 0
|
|
176
|
+
|
|
177
|
+
return fps, duration, width, height
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_duration_fast(file_path: str) -> float | None:
|
|
181
|
+
"""Get just the duration of a video file.
|
|
182
|
+
|
|
183
|
+
This is a lightweight wrapper around get_video_info for when
|
|
184
|
+
only duration is needed (e.g., for ETA predictions).
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
file_path: Path to the video file
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Duration in seconds, or None if couldn't be determined
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
_, duration, _, _ = get_video_info(file_path)
|
|
194
|
+
return duration if duration > 0 else None
|
|
195
|
+
except Exception:
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def run_ffprobe_batch(
|
|
200
|
+
file_paths: list[str],
|
|
201
|
+
) -> dict[str, dict[str, Any] | Exception]:
|
|
202
|
+
"""Run ffprobe on multiple files in parallel.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
file_paths: List of file paths to probe
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Dict mapping file path to probe result or Exception if failed
|
|
209
|
+
"""
|
|
210
|
+
if not file_paths:
|
|
211
|
+
return {}
|
|
212
|
+
|
|
213
|
+
pool = get_ffprobe_pool()
|
|
214
|
+
futures = {pool.submit(run_ffprobe, path): path for path in file_paths}
|
|
215
|
+
|
|
216
|
+
results: dict[str, dict[str, Any] | Exception] = {}
|
|
217
|
+
for future in as_completed(futures):
|
|
218
|
+
path = futures[future]
|
|
219
|
+
try:
|
|
220
|
+
results[path] = future.result()
|
|
221
|
+
except Exception as e:
|
|
222
|
+
logger.warning(f"ffprobe failed for {path}: {e}")
|
|
223
|
+
results[path] = e
|
|
224
|
+
|
|
225
|
+
return results
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def extract_keyframes(file_path: str, timeout: int = 60) -> KeyframeInfo | None:
|
|
229
|
+
"""Extract keyframe (I-frame) timestamps from video.
|
|
230
|
+
|
|
231
|
+
Uses ffprobe with -skip_frame nokey for fast keyframe-only extraction.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
file_path: Path to video file
|
|
235
|
+
timeout: Timeout in seconds (keyframe extraction can be slow for long videos)
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
KeyframeInfo with timestamps and analysis, or None if extraction fails
|
|
239
|
+
"""
|
|
240
|
+
cmd = [
|
|
241
|
+
"ffprobe",
|
|
242
|
+
"-v",
|
|
243
|
+
"error",
|
|
244
|
+
"-select_streams",
|
|
245
|
+
"v:0",
|
|
246
|
+
"-skip_frame",
|
|
247
|
+
"nokey",
|
|
248
|
+
"-show_entries",
|
|
249
|
+
"frame=pts_time",
|
|
250
|
+
"-of",
|
|
251
|
+
"csv=p=0",
|
|
252
|
+
file_path,
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=timeout)
|
|
257
|
+
|
|
258
|
+
# Parse timestamps from output (one per line)
|
|
259
|
+
timestamps: list[float] = []
|
|
260
|
+
for line in result.stdout.strip().split("\n"):
|
|
261
|
+
line = line.strip()
|
|
262
|
+
if line:
|
|
263
|
+
try:
|
|
264
|
+
timestamps.append(float(line))
|
|
265
|
+
except ValueError:
|
|
266
|
+
continue
|
|
267
|
+
|
|
268
|
+
if not timestamps:
|
|
269
|
+
return None
|
|
270
|
+
|
|
271
|
+
# Analyze interval pattern
|
|
272
|
+
is_fixed, avg_interval = _analyze_keyframe_intervals(timestamps)
|
|
273
|
+
|
|
274
|
+
return KeyframeInfo(
|
|
275
|
+
timestamps=timestamps,
|
|
276
|
+
count=len(timestamps),
|
|
277
|
+
is_fixed_interval=is_fixed,
|
|
278
|
+
avg_interval=avg_interval,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
except subprocess.TimeoutExpired:
|
|
282
|
+
logger.warning(f"Keyframe extraction timed out for {file_path}")
|
|
283
|
+
return None
|
|
284
|
+
except subprocess.CalledProcessError as e:
|
|
285
|
+
logger.warning(f"Keyframe extraction failed for {file_path}: {e.stderr}")
|
|
286
|
+
return None
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.warning(f"Keyframe extraction error for {file_path}: {e}")
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _analyze_keyframe_intervals(timestamps: list[float]) -> tuple[bool, float | None]:
|
|
293
|
+
"""Analyze keyframe intervals to detect fixed GOP vs scene cuts.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
timestamps: List of keyframe timestamps in seconds
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Tuple of (is_fixed_interval, average_interval)
|
|
300
|
+
is_fixed_interval is True if keyframes appear at regular intervals (GOP)
|
|
301
|
+
"""
|
|
302
|
+
if len(timestamps) < 2:
|
|
303
|
+
return False, None
|
|
304
|
+
|
|
305
|
+
# Calculate intervals between keyframes
|
|
306
|
+
intervals = [timestamps[i + 1] - timestamps[i] for i in range(len(timestamps) - 1)]
|
|
307
|
+
|
|
308
|
+
avg_interval = sum(intervals) / len(intervals)
|
|
309
|
+
|
|
310
|
+
if avg_interval == 0:
|
|
311
|
+
return False, 0.0
|
|
312
|
+
|
|
313
|
+
# Check if intervals are consistent (within 20% of average)
|
|
314
|
+
# Fixed GOP will have very consistent intervals
|
|
315
|
+
# Scene cuts will have irregular intervals
|
|
316
|
+
variance_threshold = 0.2 # 20% variance allowed for "fixed"
|
|
317
|
+
consistent_count = sum(1 for interval in intervals if abs(interval - avg_interval) / avg_interval < variance_threshold)
|
|
318
|
+
|
|
319
|
+
# If 80%+ of intervals are consistent, consider it fixed GOP
|
|
320
|
+
is_fixed = consistent_count / len(intervals) >= 0.8
|
|
321
|
+
|
|
322
|
+
return is_fixed, round(avg_interval, 3)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def parse_fps(video_stream: dict[str, Any]) -> float | None:
|
|
326
|
+
"""Parse frame rate from video stream."""
|
|
327
|
+
# Try avg_frame_rate first
|
|
328
|
+
fps_str = video_stream.get("avg_frame_rate", "")
|
|
329
|
+
if fps_str and "/" in fps_str:
|
|
330
|
+
num, den = fps_str.split("/")
|
|
331
|
+
if int(den) != 0:
|
|
332
|
+
return round(int(num) / int(den), 2)
|
|
333
|
+
|
|
334
|
+
# Fall back to r_frame_rate
|
|
335
|
+
fps_str = video_stream.get("r_frame_rate", "")
|
|
336
|
+
if fps_str and "/" in fps_str:
|
|
337
|
+
num, den = fps_str.split("/")
|
|
338
|
+
if int(den) != 0:
|
|
339
|
+
return round(int(num) / int(den), 2)
|
|
340
|
+
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def parse_bit_depth(video_stream: dict[str, Any]) -> int | None:
|
|
345
|
+
"""Parse bit depth from video stream."""
|
|
346
|
+
# Try bits_per_raw_sample first
|
|
347
|
+
bits = video_stream.get("bits_per_raw_sample")
|
|
348
|
+
if bits:
|
|
349
|
+
try:
|
|
350
|
+
return int(bits)
|
|
351
|
+
except ValueError:
|
|
352
|
+
pass
|
|
353
|
+
|
|
354
|
+
# Parse from pixel format (e.g., yuv420p10le, yuv422p10be)
|
|
355
|
+
pix_fmt = video_stream.get("pix_fmt", "")
|
|
356
|
+
if pix_fmt:
|
|
357
|
+
match = re.search(r"(\d+)(le|be)?$", pix_fmt)
|
|
358
|
+
if match:
|
|
359
|
+
depth = int(match.group(1))
|
|
360
|
+
if depth in (10, 12, 16):
|
|
361
|
+
return depth
|
|
362
|
+
if pix_fmt in ("yuv420p", "yuv422p", "yuv444p", "yuvj420p", "yuvj422p"):
|
|
363
|
+
return 8
|
|
364
|
+
|
|
365
|
+
return None
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def extract_timecode(tags: dict[str, str], video_stream: dict[str, Any] | None) -> str | None:
|
|
369
|
+
"""Extract start timecode from metadata."""
|
|
370
|
+
tags_lower = {k.lower(): v for k, v in tags.items()}
|
|
371
|
+
|
|
372
|
+
tc = tags_lower.get("timecode")
|
|
373
|
+
if tc:
|
|
374
|
+
return tc
|
|
375
|
+
|
|
376
|
+
if video_stream:
|
|
377
|
+
stream_tags = video_stream.get("tags", {})
|
|
378
|
+
stream_tags_lower = {k.lower(): v for k, v in stream_tags.items()}
|
|
379
|
+
tc = stream_tags_lower.get("timecode")
|
|
380
|
+
if tc:
|
|
381
|
+
return tc
|
|
382
|
+
|
|
383
|
+
return None
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def parse_creation_time(tags: dict[str, str], stream_tags: dict[str, str] | None = None) -> datetime | None:
|
|
387
|
+
"""Parse creation time from metadata tags.
|
|
388
|
+
|
|
389
|
+
Checks format-level tags first, then stream tags as fallback.
|
|
390
|
+
Normalizes keys to lowercase for case-insensitive lookup.
|
|
391
|
+
"""
|
|
392
|
+
# Normalize keys to lowercase for case-insensitive lookup
|
|
393
|
+
tags_lower = {k.lower(): v for k, v in tags.items()}
|
|
394
|
+
|
|
395
|
+
time_str = (
|
|
396
|
+
tags_lower.get("creation_time")
|
|
397
|
+
or tags_lower.get("date")
|
|
398
|
+
or tags_lower.get("com.apple.quicktime.creationdate")
|
|
399
|
+
or tags_lower.get("date_recorded")
|
|
400
|
+
or tags_lower.get("date-eng") # Some MKV files
|
|
401
|
+
or tags_lower.get("modification_date") # Canon MXF files
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
# Fallback to stream tags if format tags don't have the date
|
|
405
|
+
if not time_str and stream_tags:
|
|
406
|
+
stream_tags_lower = {k.lower(): v for k, v in stream_tags.items()}
|
|
407
|
+
time_str = stream_tags_lower.get("creation_time") or stream_tags_lower.get("date")
|
|
408
|
+
|
|
409
|
+
if not time_str:
|
|
410
|
+
return None
|
|
411
|
+
|
|
412
|
+
# Handle timezone suffixes by stripping them for parsing
|
|
413
|
+
# ffprobe can return: "2024-06-15T10:30:00.000000Z"
|
|
414
|
+
# or "2024-06-15 10:30:00+0200"
|
|
415
|
+
# or "2024-06-15T10:30:00+02:00"
|
|
416
|
+
time_str_clean = time_str.strip()
|
|
417
|
+
|
|
418
|
+
# Remove timezone offset for parsing (we'll treat as UTC if present)
|
|
419
|
+
# Patterns like +0200, +02:00, -0500, -05:00
|
|
420
|
+
tz_pattern = r"[+-]\d{2}:?\d{2}$"
|
|
421
|
+
time_str_no_tz = re.sub(tz_pattern, "", time_str_clean)
|
|
422
|
+
|
|
423
|
+
formats = [
|
|
424
|
+
("%Y-%m-%dT%H:%M:%S.%f", None), # Variable microseconds
|
|
425
|
+
("%Y-%m-%dT%H:%M:%S", 19),
|
|
426
|
+
("%Y-%m-%d %H:%M:%S", 19),
|
|
427
|
+
("%Y:%m:%d %H:%M:%S", 19), # EXIF format
|
|
428
|
+
("%Y/%m/%d %H:%M:%S", 19),
|
|
429
|
+
("%d/%m/%Y %H:%M:%S", 19), # European format
|
|
430
|
+
("%Y-%m-%d", 10), # Date only
|
|
431
|
+
]
|
|
432
|
+
|
|
433
|
+
for fmt, length in formats:
|
|
434
|
+
try:
|
|
435
|
+
if length:
|
|
436
|
+
return datetime.strptime(time_str_no_tz[:length], fmt)
|
|
437
|
+
else:
|
|
438
|
+
# Variable length (for microseconds)
|
|
439
|
+
# Find the 'T' and parse accordingly
|
|
440
|
+
if "T" in time_str_no_tz:
|
|
441
|
+
return datetime.strptime(time_str_no_tz.rstrip("Z"), fmt)
|
|
442
|
+
except ValueError:
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
logger.warning(f"Could not parse creation time: {time_str}")
|
|
446
|
+
return None
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def parse_iso6709(location: str) -> GPSCoordinates | None:
|
|
450
|
+
"""Parse ISO 6709 format GPS coordinates."""
|
|
451
|
+
pattern = r"([+-]\d+\.?\d*)"
|
|
452
|
+
matches = re.findall(pattern, location)
|
|
453
|
+
|
|
454
|
+
if len(matches) >= 2:
|
|
455
|
+
try:
|
|
456
|
+
return GPSCoordinates(
|
|
457
|
+
latitude=float(matches[0]),
|
|
458
|
+
longitude=float(matches[1]),
|
|
459
|
+
altitude=float(matches[2]) if len(matches) >= 3 else None,
|
|
460
|
+
)
|
|
461
|
+
except ValueError:
|
|
462
|
+
pass
|
|
463
|
+
|
|
464
|
+
return None
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def parse_dms_coordinate(dms: str, ref: str | None) -> float | None:
|
|
468
|
+
"""Parse DMS (degrees;minutes;seconds) format to decimal degrees.
|
|
469
|
+
|
|
470
|
+
Handles multiple formats:
|
|
471
|
+
- 63;6;38.880 (all semicolons)
|
|
472
|
+
- 63;6:38.880 (mixed semicolon and colon)
|
|
473
|
+
"""
|
|
474
|
+
try:
|
|
475
|
+
# Normalize: replace colons with semicolons
|
|
476
|
+
normalized = dms.replace(":", ";")
|
|
477
|
+
parts = normalized.split(";")
|
|
478
|
+
if len(parts) != 3:
|
|
479
|
+
return float(dms)
|
|
480
|
+
|
|
481
|
+
degrees = float(parts[0])
|
|
482
|
+
minutes = float(parts[1])
|
|
483
|
+
seconds = float(parts[2])
|
|
484
|
+
|
|
485
|
+
decimal = degrees + minutes / 60 + seconds / 3600
|
|
486
|
+
|
|
487
|
+
if ref in ("S", "W"):
|
|
488
|
+
decimal = -decimal
|
|
489
|
+
|
|
490
|
+
return decimal
|
|
491
|
+
except (ValueError, IndexError):
|
|
492
|
+
return None
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def extract_gps_from_tags(tags: dict[str, str]) -> GPS | None:
|
|
496
|
+
"""Extract GPS coordinates from metadata tags."""
|
|
497
|
+
tags_lower = {k.lower(): v for k, v in tags.items()}
|
|
498
|
+
|
|
499
|
+
location = tags_lower.get("location") or tags_lower.get("com.apple.quicktime.location.iso6709") or tags_lower.get("gps")
|
|
500
|
+
|
|
501
|
+
if location:
|
|
502
|
+
coords = parse_iso6709(location)
|
|
503
|
+
if coords:
|
|
504
|
+
return GPS(
|
|
505
|
+
latitude=coords.latitude,
|
|
506
|
+
longitude=coords.longitude,
|
|
507
|
+
altitude=coords.altitude,
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
lat = tags_lower.get("gps_latitude") or tags_lower.get("location-latitude")
|
|
511
|
+
lon = tags_lower.get("gps_longitude") or tags_lower.get("location-longitude")
|
|
512
|
+
|
|
513
|
+
if lat and lon:
|
|
514
|
+
try:
|
|
515
|
+
return GPS(
|
|
516
|
+
latitude=float(lat),
|
|
517
|
+
longitude=float(lon),
|
|
518
|
+
altitude=float(tags_lower.get("gps_altitude", 0)) or None,
|
|
519
|
+
)
|
|
520
|
+
except ValueError:
|
|
521
|
+
pass
|
|
522
|
+
|
|
523
|
+
return None
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def extract_color_space_from_stream(video_stream: dict[str, Any] | None, tags: dict[str, str]) -> ColorSpace | None:
|
|
527
|
+
"""Extract color space information from video stream and format tags."""
|
|
528
|
+
transfer: str | None = None
|
|
529
|
+
primaries: str | None = None
|
|
530
|
+
matrix: str | None = None
|
|
531
|
+
|
|
532
|
+
if video_stream:
|
|
533
|
+
transfer = video_stream.get("color_transfer")
|
|
534
|
+
primaries = video_stream.get("color_primaries")
|
|
535
|
+
matrix = video_stream.get("color_space")
|
|
536
|
+
|
|
537
|
+
tags_lower = {k.lower(): v for k, v in tags.items()}
|
|
538
|
+
custom_gamma = tags_lower.get("com.apple.proapps.customgamma", "")
|
|
539
|
+
if custom_gamma:
|
|
540
|
+
parts = custom_gamma.split(".")
|
|
541
|
+
if parts:
|
|
542
|
+
transfer = parts[-1]
|
|
543
|
+
|
|
544
|
+
if not (transfer or primaries or matrix):
|
|
545
|
+
return None
|
|
546
|
+
|
|
547
|
+
return ColorSpace(
|
|
548
|
+
transfer=transfer,
|
|
549
|
+
primaries=primaries,
|
|
550
|
+
matrix=matrix,
|
|
551
|
+
detection_method=DetectionMethod.METADATA,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def detect_stereo_3d(probe_data: dict[str, Any]) -> Stereo3D | None:
|
|
556
|
+
"""Detect stereoscopic 3D video format.
|
|
557
|
+
|
|
558
|
+
Detection methods:
|
|
559
|
+
- MVC: Two H.264 video streams (base view + dependent view)
|
|
560
|
+
- Metadata tags: stereo_mode, stereo3d tags
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
Stereo3D info if 3D is detected, None otherwise.
|
|
564
|
+
"""
|
|
565
|
+
streams = probe_data.get("streams", [])
|
|
566
|
+
|
|
567
|
+
# Count video streams
|
|
568
|
+
video_streams = [s for s in streams if s.get("codec_type") == "video"]
|
|
569
|
+
|
|
570
|
+
# MVC detection: Two H.264 video streams where second has 0x0 dimensions
|
|
571
|
+
# (dependent view references base view)
|
|
572
|
+
if len(video_streams) >= 2:
|
|
573
|
+
first_video = video_streams[0]
|
|
574
|
+
second_video = video_streams[1]
|
|
575
|
+
|
|
576
|
+
if first_video.get("codec_name") == "h264" and second_video.get("codec_name") == "h264":
|
|
577
|
+
first_width = first_video.get("width", 0)
|
|
578
|
+
second_width = second_video.get("width", 0)
|
|
579
|
+
|
|
580
|
+
# MVC dependent view typically has 0x0 dimensions
|
|
581
|
+
if first_width > 0 and second_width == 0:
|
|
582
|
+
logger.info("Detected MVC stereoscopic 3D (two H.264 streams)")
|
|
583
|
+
return Stereo3D(
|
|
584
|
+
mode=Stereo3DMode.MVC,
|
|
585
|
+
eye_count=2,
|
|
586
|
+
has_left_eye=True,
|
|
587
|
+
has_right_eye=True,
|
|
588
|
+
detection_method=DetectionMethod.METADATA,
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
# Check for stereo_mode metadata tag (used by some 360 cameras and encoders)
|
|
592
|
+
tags = probe_data.get("format", {}).get("tags", {})
|
|
593
|
+
tags_lower = {k.lower(): v for k, v in tags.items()}
|
|
594
|
+
|
|
595
|
+
stereo_mode = tags_lower.get("stereo_mode") or tags_lower.get("stereo3d")
|
|
596
|
+
if stereo_mode:
|
|
597
|
+
mode_lower = stereo_mode.lower()
|
|
598
|
+
if "side" in mode_lower or "sbs" in mode_lower:
|
|
599
|
+
return Stereo3D(
|
|
600
|
+
mode=Stereo3DMode.SIDE_BY_SIDE,
|
|
601
|
+
detection_method=DetectionMethod.METADATA,
|
|
602
|
+
)
|
|
603
|
+
elif "top" in mode_lower or "over" in mode_lower or "tab" in mode_lower:
|
|
604
|
+
return Stereo3D(
|
|
605
|
+
mode=Stereo3DMode.TOP_BOTTOM,
|
|
606
|
+
detection_method=DetectionMethod.METADATA,
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
return None
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def build_base_metadata(
|
|
613
|
+
probe_data: dict[str, Any],
|
|
614
|
+
file_path: str,
|
|
615
|
+
) -> Metadata:
|
|
616
|
+
"""Build base metadata from ffprobe data without device-specific processing."""
|
|
617
|
+
format_info = probe_data.get("format", {})
|
|
618
|
+
tags = format_info.get("tags", {})
|
|
619
|
+
|
|
620
|
+
video_stream = None
|
|
621
|
+
audio_stream = None
|
|
622
|
+
for stream in probe_data.get("streams", []):
|
|
623
|
+
if stream.get("codec_type") == "video" and video_stream is None:
|
|
624
|
+
video_stream = stream
|
|
625
|
+
elif stream.get("codec_type") == "audio" and audio_stream is None:
|
|
626
|
+
audio_stream = stream
|
|
627
|
+
|
|
628
|
+
resolution = Resolution(
|
|
629
|
+
width=video_stream.get("width", 0) if video_stream else 0,
|
|
630
|
+
height=video_stream.get("height", 0) if video_stream else 0,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
codec = Codec(
|
|
634
|
+
video=video_stream.get("codec_name") if video_stream else None,
|
|
635
|
+
audio=audio_stream.get("codec_name") if audio_stream else None,
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
video_codec: VideoCodec | None = None
|
|
639
|
+
if video_stream:
|
|
640
|
+
video_codec = VideoCodec(
|
|
641
|
+
name=video_stream.get("codec_name", "unknown"),
|
|
642
|
+
profile=video_stream.get("profile"),
|
|
643
|
+
bit_depth=parse_bit_depth(video_stream),
|
|
644
|
+
pixel_format=video_stream.get("pix_fmt"),
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
audio_info: AudioInfo | None = None
|
|
648
|
+
if audio_stream:
|
|
649
|
+
audio_info = AudioInfo(
|
|
650
|
+
codec=audio_stream.get("codec_name"),
|
|
651
|
+
sample_rate=int(audio_stream.get("sample_rate", 0)) or None,
|
|
652
|
+
channels=audio_stream.get("channels"),
|
|
653
|
+
bit_depth=audio_stream.get("bits_per_sample") or audio_stream.get("bits_per_raw_sample"),
|
|
654
|
+
bitrate=int(audio_stream.get("bit_rate", 0)) or None,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
fps = parse_fps(video_stream) if video_stream else None
|
|
658
|
+
duration = float(format_info.get("duration", 0))
|
|
659
|
+
bitrate = int(format_info.get("bit_rate", 0)) if format_info.get("bit_rate") else None
|
|
660
|
+
file_size = os.path.getsize(file_path)
|
|
661
|
+
|
|
662
|
+
# Get stream tags for fallback date extraction
|
|
663
|
+
video_stream_tags = video_stream.get("tags", {}) if video_stream else None
|
|
664
|
+
created_at = parse_creation_time(tags, video_stream_tags)
|
|
665
|
+
timecode = extract_timecode(tags, video_stream)
|
|
666
|
+
gps = extract_gps_from_tags(tags)
|
|
667
|
+
color_space = extract_color_space_from_stream(video_stream, tags)
|
|
668
|
+
|
|
669
|
+
# Detect stereoscopic 3D
|
|
670
|
+
stereo_3d = detect_stereo_3d(probe_data)
|
|
671
|
+
|
|
672
|
+
return Metadata(
|
|
673
|
+
duration=duration,
|
|
674
|
+
resolution=resolution,
|
|
675
|
+
codec=codec,
|
|
676
|
+
video_codec=video_codec,
|
|
677
|
+
audio=audio_info,
|
|
678
|
+
fps=fps,
|
|
679
|
+
bitrate=bitrate,
|
|
680
|
+
file_size=file_size,
|
|
681
|
+
timecode=timecode,
|
|
682
|
+
created_at=created_at,
|
|
683
|
+
device=None,
|
|
684
|
+
gps=gps,
|
|
685
|
+
color_space=color_space,
|
|
686
|
+
lens=None,
|
|
687
|
+
stereo_3d=stereo_3d,
|
|
688
|
+
)
|