media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
"""GoPro metadata extractor.
|
|
2
|
+
|
|
3
|
+
Handles GoPro cameras:
|
|
4
|
+
- HERO5, HERO6, HERO7, HERO8, HERO9, HERO10, HERO11, HERO12, HERO13
|
|
5
|
+
- MAX (360 camera)
|
|
6
|
+
- Session series
|
|
7
|
+
|
|
8
|
+
Detection methods:
|
|
9
|
+
- handler_name containing "GoPro"
|
|
10
|
+
- firmware tag pattern (e.g., "HD7.01.01.90.00" for HERO7)
|
|
11
|
+
- Filename patterns (GH*, GX*, GOPR*)
|
|
12
|
+
|
|
13
|
+
GoPro files contain:
|
|
14
|
+
- gpmd stream: GPS, accelerometer, gyroscope data
|
|
15
|
+
- Timecode
|
|
16
|
+
- Color space (usually BT.709)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
import struct
|
|
21
|
+
import subprocess
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from media_engine.schemas import (
|
|
26
|
+
GPS,
|
|
27
|
+
DetectionMethod,
|
|
28
|
+
DeviceInfo,
|
|
29
|
+
GPSTrack,
|
|
30
|
+
GPSTrackPoint,
|
|
31
|
+
MediaDeviceType,
|
|
32
|
+
Metadata,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
from .registry import get_tags_lower, register_extractor
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
# GoPro model mapping from firmware prefix
|
|
40
|
+
GOPRO_MODELS = {
|
|
41
|
+
"HD5": "HERO5 Black",
|
|
42
|
+
"HD6": "HERO6 Black",
|
|
43
|
+
"HD7": "HERO7 Black",
|
|
44
|
+
"HD8": "HERO8 Black",
|
|
45
|
+
"HD9": "HERO9 Black",
|
|
46
|
+
"H10": "HERO10 Black",
|
|
47
|
+
"H11": "HERO11 Black",
|
|
48
|
+
"H12": "HERO12 Black",
|
|
49
|
+
"H13": "HERO13 Black",
|
|
50
|
+
"H21": "HERO Session",
|
|
51
|
+
"H22": "HERO5 Session",
|
|
52
|
+
"HX": "MAX",
|
|
53
|
+
"H19": "MAX", # Another MAX identifier
|
|
54
|
+
"FS": "Fusion",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _parse_firmware_model(firmware: str) -> str | None:
|
|
59
|
+
"""Parse GoPro model from firmware string.
|
|
60
|
+
|
|
61
|
+
Examples:
|
|
62
|
+
- "HD7.01.01.90.00" -> HERO7 Black
|
|
63
|
+
- "H10.01.01.40.00" -> HERO10 Black
|
|
64
|
+
"""
|
|
65
|
+
if not firmware:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
# Try direct prefix match
|
|
69
|
+
for prefix, model in GOPRO_MODELS.items():
|
|
70
|
+
if firmware.upper().startswith(prefix):
|
|
71
|
+
return model
|
|
72
|
+
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _extract_gpmd_gps(file_path: str) -> tuple[GPS | None, GPSTrack | None]:
|
|
77
|
+
"""Extract GPS from GoPro GPMD stream.
|
|
78
|
+
|
|
79
|
+
GoPro stores telemetry in a binary stream with FourCC tags.
|
|
80
|
+
GPS data is under DEVC -> STRM -> GPS5 (lat, lon, alt, speed2d, speed3d).
|
|
81
|
+
|
|
82
|
+
Returns tuple of (first GPS point, full GPS track).
|
|
83
|
+
"""
|
|
84
|
+
try:
|
|
85
|
+
# Extract gpmd stream using ffmpeg
|
|
86
|
+
cmd = [
|
|
87
|
+
"ffmpeg",
|
|
88
|
+
"-y",
|
|
89
|
+
"-i",
|
|
90
|
+
file_path,
|
|
91
|
+
"-codec",
|
|
92
|
+
"copy",
|
|
93
|
+
"-map",
|
|
94
|
+
"0:d:0", # First data stream (gpmd)
|
|
95
|
+
"-f",
|
|
96
|
+
"rawvideo",
|
|
97
|
+
"pipe:1",
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
result = subprocess.run(cmd, capture_output=True, timeout=30, check=False)
|
|
101
|
+
|
|
102
|
+
if result.returncode != 0 or not result.stdout:
|
|
103
|
+
return None, None
|
|
104
|
+
|
|
105
|
+
data = result.stdout
|
|
106
|
+
gps_points: list[GPSTrackPoint] = []
|
|
107
|
+
|
|
108
|
+
# Parse GPMD binary format
|
|
109
|
+
# Looking for GPS5 tag which contains: lat, lon, alt, speed2d, speed3d
|
|
110
|
+
# Each value is a signed 32-bit int, scaled by SCAL value
|
|
111
|
+
|
|
112
|
+
i = 0
|
|
113
|
+
current_scale = 1.0
|
|
114
|
+
|
|
115
|
+
while i < len(data) - 8:
|
|
116
|
+
# Read FourCC tag
|
|
117
|
+
tag = data[i : i + 4]
|
|
118
|
+
if len(tag) < 4:
|
|
119
|
+
break
|
|
120
|
+
|
|
121
|
+
# Check for SCAL (scale factor)
|
|
122
|
+
if tag == b"SCAL":
|
|
123
|
+
type_byte = data[i + 4] if i + 4 < len(data) else 0
|
|
124
|
+
size = data[i + 5] if i + 5 < len(data) else 0
|
|
125
|
+
count = (data[i + 6] << 8 | data[i + 7]) if i + 7 < len(data) else 0
|
|
126
|
+
|
|
127
|
+
if type_byte == ord("l") and size == 4 and count >= 1:
|
|
128
|
+
# 32-bit signed int scale
|
|
129
|
+
scale_offset = i + 8
|
|
130
|
+
if scale_offset + 4 <= len(data):
|
|
131
|
+
current_scale = struct.unpack(">i", data[scale_offset : scale_offset + 4])[0]
|
|
132
|
+
|
|
133
|
+
# Check for GPS5 (GPS data)
|
|
134
|
+
elif tag == b"GPS5":
|
|
135
|
+
type_byte = data[i + 4] if i + 4 < len(data) else 0
|
|
136
|
+
size = data[i + 5] if i + 5 < len(data) else 0
|
|
137
|
+
count = (data[i + 6] << 8 | data[i + 7]) if i + 7 < len(data) else 0
|
|
138
|
+
|
|
139
|
+
if type_byte == ord("l") and size == 20: # 5 x 4-byte ints
|
|
140
|
+
gps_offset = i + 8
|
|
141
|
+
for j in range(count):
|
|
142
|
+
sample_offset = gps_offset + j * 20
|
|
143
|
+
if sample_offset + 20 <= len(data):
|
|
144
|
+
values = struct.unpack(">iiiii", data[sample_offset : sample_offset + 20])
|
|
145
|
+
lat = values[0] / current_scale
|
|
146
|
+
lon = values[1] / current_scale
|
|
147
|
+
alt = values[2] / current_scale
|
|
148
|
+
|
|
149
|
+
# Validate coordinates
|
|
150
|
+
if -90 <= lat <= 90 and -180 <= lon <= 180 and lat != 0:
|
|
151
|
+
point = GPSTrackPoint(
|
|
152
|
+
latitude=round(lat, 6),
|
|
153
|
+
longitude=round(lon, 6),
|
|
154
|
+
altitude=round(alt, 1) if alt != 0 else None,
|
|
155
|
+
)
|
|
156
|
+
# Dedupe consecutive identical points
|
|
157
|
+
if not gps_points or (point.latitude != gps_points[-1].latitude or point.longitude != gps_points[-1].longitude):
|
|
158
|
+
gps_points.append(point)
|
|
159
|
+
|
|
160
|
+
i += 1
|
|
161
|
+
|
|
162
|
+
if gps_points:
|
|
163
|
+
first_gps = GPS(
|
|
164
|
+
latitude=gps_points[0].latitude,
|
|
165
|
+
longitude=gps_points[0].longitude,
|
|
166
|
+
altitude=gps_points[0].altitude,
|
|
167
|
+
)
|
|
168
|
+
track = GPSTrack(points=gps_points, source="gpmd") if len(gps_points) > 1 else None
|
|
169
|
+
logger.info(f"Extracted {len(gps_points)} GPS points from GoPro GPMD")
|
|
170
|
+
return first_gps, track
|
|
171
|
+
|
|
172
|
+
return None, None
|
|
173
|
+
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.debug(f"Failed to extract GPS from GPMD: {e}")
|
|
176
|
+
return None, None
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class GoProExtractor:
|
|
180
|
+
"""Metadata extractor for GoPro cameras."""
|
|
181
|
+
|
|
182
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
183
|
+
"""Detect if file is from a GoPro camera."""
|
|
184
|
+
path = Path(file_path)
|
|
185
|
+
tags = get_tags_lower(probe_data)
|
|
186
|
+
|
|
187
|
+
# Check firmware tag
|
|
188
|
+
firmware = tags.get("firmware", "")
|
|
189
|
+
if firmware and any(firmware.upper().startswith(prefix) for prefix in GOPRO_MODELS):
|
|
190
|
+
return True
|
|
191
|
+
|
|
192
|
+
# Check handler_name for "GoPro"
|
|
193
|
+
for stream in probe_data.get("streams", []):
|
|
194
|
+
handler = stream.get("tags", {}).get("handler_name", "")
|
|
195
|
+
if "GoPro" in handler:
|
|
196
|
+
return True
|
|
197
|
+
|
|
198
|
+
# Check encoder tag
|
|
199
|
+
for stream in probe_data.get("streams", []):
|
|
200
|
+
encoder = stream.get("tags", {}).get("encoder", "")
|
|
201
|
+
if "GoPro" in encoder:
|
|
202
|
+
return True
|
|
203
|
+
|
|
204
|
+
# Check filename pattern (GH*, GX*, GOPR*)
|
|
205
|
+
name = path.stem.upper()
|
|
206
|
+
if name.startswith(("GH", "GX", "GOPR")):
|
|
207
|
+
return True
|
|
208
|
+
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
def extract(
|
|
212
|
+
self,
|
|
213
|
+
probe_data: dict[str, Any],
|
|
214
|
+
file_path: str,
|
|
215
|
+
base_metadata: Metadata,
|
|
216
|
+
) -> Metadata:
|
|
217
|
+
"""Extract GoPro-specific metadata."""
|
|
218
|
+
tags = get_tags_lower(probe_data)
|
|
219
|
+
|
|
220
|
+
# Get firmware and parse model
|
|
221
|
+
firmware = tags.get("firmware", "")
|
|
222
|
+
model = _parse_firmware_model(firmware)
|
|
223
|
+
|
|
224
|
+
# Determine device type (MAX is 360 camera)
|
|
225
|
+
device_type = MediaDeviceType.ACTION_CAMERA
|
|
226
|
+
if model and "MAX" in model:
|
|
227
|
+
device_type = MediaDeviceType.CAMERA_360
|
|
228
|
+
|
|
229
|
+
device = DeviceInfo(
|
|
230
|
+
make="GoPro",
|
|
231
|
+
model=model,
|
|
232
|
+
software=firmware if firmware else None,
|
|
233
|
+
type=device_type,
|
|
234
|
+
detection_method=DetectionMethod.METADATA,
|
|
235
|
+
confidence=1.0,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Extract GPS from GPMD stream
|
|
239
|
+
gps, gps_track = _extract_gpmd_gps(file_path)
|
|
240
|
+
|
|
241
|
+
# Use extracted GPS or keep base
|
|
242
|
+
if gps is None:
|
|
243
|
+
gps = base_metadata.gps
|
|
244
|
+
if gps_track is None:
|
|
245
|
+
gps_track = base_metadata.gps_track
|
|
246
|
+
|
|
247
|
+
# Update metadata
|
|
248
|
+
base_metadata.device = device
|
|
249
|
+
base_metadata.gps = gps
|
|
250
|
+
base_metadata.gps_track = gps_track
|
|
251
|
+
|
|
252
|
+
return base_metadata
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# Register the extractor
|
|
256
|
+
register_extractor("gopro", GoProExtractor())
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""RED Digital Cinema metadata extractor.
|
|
2
|
+
|
|
3
|
+
Handles RED cameras:
|
|
4
|
+
- RED ONE (2007-2013)
|
|
5
|
+
- SCARLET (2011-2016)
|
|
6
|
+
- EPIC (2010-2016)
|
|
7
|
+
- DRAGON/Weapon (2014-2018)
|
|
8
|
+
- Helium (2016-present)
|
|
9
|
+
- KOMODO (2020-present)
|
|
10
|
+
- V-Raptor (2021-present)
|
|
11
|
+
|
|
12
|
+
R3D files store metadata in a proprietary header format.
|
|
13
|
+
ffprobe CANNOT read R3D natively, so we parse the header directly.
|
|
14
|
+
|
|
15
|
+
Header structure (reverse-engineered):
|
|
16
|
+
- 0x00-0x03: Size
|
|
17
|
+
- 0x04-0x07: Magic "RED2"
|
|
18
|
+
- 0x08+: TLV-like blocks with type codes
|
|
19
|
+
|
|
20
|
+
Notable fields found in header:
|
|
21
|
+
- Timecode (format HH:MM:SS:FF)
|
|
22
|
+
- Date (format YYYYMMDD)
|
|
23
|
+
- Firmware version
|
|
24
|
+
- Camera model (SCARLET, EPIC, KOMODO, etc.)
|
|
25
|
+
- Serial number
|
|
26
|
+
- Lens info (make, model, focal length, aperture)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
import re
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
from media_engine.schemas import (
|
|
35
|
+
Codec,
|
|
36
|
+
ColorSpace,
|
|
37
|
+
DetectionMethod,
|
|
38
|
+
DeviceInfo,
|
|
39
|
+
LensInfo,
|
|
40
|
+
MediaDeviceType,
|
|
41
|
+
Metadata,
|
|
42
|
+
VideoCodec,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
from .registry import register_extractor
|
|
46
|
+
|
|
47
|
+
logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
# RED camera models by sensor/body
|
|
50
|
+
RED_MODELS = {
|
|
51
|
+
"dragon": "RED Dragon",
|
|
52
|
+
"helium": "RED Helium",
|
|
53
|
+
"gemini": "RED Gemini",
|
|
54
|
+
"monstro": "RED Monstro",
|
|
55
|
+
"komodo": "KOMODO",
|
|
56
|
+
"raptor": "V-RAPTOR",
|
|
57
|
+
"ranger": "RANGER",
|
|
58
|
+
"weapon": "WEAPON",
|
|
59
|
+
"epic": "EPIC",
|
|
60
|
+
"scarlet": "SCARLET",
|
|
61
|
+
"raven": "RAVEN",
|
|
62
|
+
"red one": "RED ONE",
|
|
63
|
+
"dsmc2": "DSMC2",
|
|
64
|
+
"dsmc3": "DSMC3",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _parse_r3d_header(file_path: str) -> dict[str, Any] | None:
|
|
69
|
+
"""Parse R3D file header for metadata.
|
|
70
|
+
|
|
71
|
+
Returns dict with extracted metadata or None if not a valid R3D.
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
with open(file_path, "rb") as f:
|
|
75
|
+
# Read header (first 1KB should contain all metadata)
|
|
76
|
+
header = f.read(1024)
|
|
77
|
+
|
|
78
|
+
if len(header) < 8:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
# Check magic
|
|
82
|
+
magic = header[4:8]
|
|
83
|
+
if magic != b"RED2":
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
result: dict[str, Any] = {"make": "RED"}
|
|
87
|
+
|
|
88
|
+
# Find timecode (format like "01:00:26:06")
|
|
89
|
+
tc_match = re.search(rb"\d{2}:\d{2}:\d{2}:\d{2}", header)
|
|
90
|
+
if tc_match:
|
|
91
|
+
tc_str = tc_match.group().decode("ascii")
|
|
92
|
+
result["timecode"] = tc_str
|
|
93
|
+
|
|
94
|
+
# Find date (format YYYYMMDD)
|
|
95
|
+
date_match = re.search(rb"20\d{6}", header)
|
|
96
|
+
if date_match:
|
|
97
|
+
date_str = date_match.group().decode("ascii")
|
|
98
|
+
# Format as YYYY-MM-DD
|
|
99
|
+
result["date"] = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}"
|
|
100
|
+
|
|
101
|
+
# Find firmware version (X.X.XX pattern)
|
|
102
|
+
fw_match = re.search(rb"\d+\.\d+\.\d+", header)
|
|
103
|
+
if fw_match:
|
|
104
|
+
result["firmware"] = fw_match.group().decode("ascii")
|
|
105
|
+
|
|
106
|
+
# Find camera model - look for known RED models
|
|
107
|
+
models = [
|
|
108
|
+
b"SCARLET",
|
|
109
|
+
b"EPIC",
|
|
110
|
+
b"DRAGON",
|
|
111
|
+
b"WEAPON",
|
|
112
|
+
b"HELIUM",
|
|
113
|
+
b"KOMODO",
|
|
114
|
+
b"RAPTOR",
|
|
115
|
+
b"RED ONE",
|
|
116
|
+
b"GEMINI",
|
|
117
|
+
b"MONSTRO",
|
|
118
|
+
b"RANGER",
|
|
119
|
+
b"RAVEN",
|
|
120
|
+
b"DSMC2",
|
|
121
|
+
b"DSMC3",
|
|
122
|
+
]
|
|
123
|
+
for model in models:
|
|
124
|
+
if model in header.upper():
|
|
125
|
+
result["model"] = model.decode("ascii")
|
|
126
|
+
break
|
|
127
|
+
|
|
128
|
+
# Find serial number (pattern like 221LS102VTLCZ)
|
|
129
|
+
serial_match = re.search(rb"\d{3}[A-Z]{2}\d{3}[A-Z0-9]+", header)
|
|
130
|
+
if serial_match:
|
|
131
|
+
result["serial"] = serial_match.group().decode("ascii")
|
|
132
|
+
|
|
133
|
+
# Find lens info (look for known lens brands/patterns)
|
|
134
|
+
lens_patterns = [
|
|
135
|
+
rb"Canon [^\x00]+",
|
|
136
|
+
rb"Zeiss [^\x00]+",
|
|
137
|
+
rb"Leica [^\x00]+",
|
|
138
|
+
rb"Sigma [^\x00]+",
|
|
139
|
+
rb"Cooke [^\x00]+",
|
|
140
|
+
rb"Angenieux [^\x00]+",
|
|
141
|
+
rb"Fujinon [^\x00]+",
|
|
142
|
+
rb"RED [^\x00]*PRO [^\x00]*",
|
|
143
|
+
]
|
|
144
|
+
for pattern in lens_patterns:
|
|
145
|
+
lens_match = re.search(pattern, header)
|
|
146
|
+
if lens_match:
|
|
147
|
+
lens_str = lens_match.group().decode("utf-8", errors="ignore")
|
|
148
|
+
# Clean up null bytes and control chars
|
|
149
|
+
lens_str = re.sub(r"[\x00-\x1f]", "", lens_str).strip()
|
|
150
|
+
if len(lens_str) > 3:
|
|
151
|
+
result["lens_name"] = lens_str
|
|
152
|
+
break
|
|
153
|
+
|
|
154
|
+
# Find lens serial (pattern like 0018-0046-00E6)
|
|
155
|
+
lens_serial_match = re.search(rb"\d{4}-\d{4}-\d{4}", header)
|
|
156
|
+
if lens_serial_match:
|
|
157
|
+
result["lens_serial"] = lens_serial_match.group().decode("ascii")
|
|
158
|
+
|
|
159
|
+
return result if len(result) > 1 else None
|
|
160
|
+
|
|
161
|
+
except Exception as e:
|
|
162
|
+
logger.warning(f"Failed to parse R3D header: {e}")
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class RedExtractor:
|
|
167
|
+
"""Extract metadata from RED cameras."""
|
|
168
|
+
|
|
169
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
170
|
+
"""Detect if this is a RED R3D file.
|
|
171
|
+
|
|
172
|
+
Detection methods:
|
|
173
|
+
1. File extension (.R3D)
|
|
174
|
+
2. RED folder structure (RDM/RDC)
|
|
175
|
+
3. R3D magic bytes
|
|
176
|
+
"""
|
|
177
|
+
path = Path(file_path)
|
|
178
|
+
|
|
179
|
+
# Check file extension
|
|
180
|
+
if path.suffix.upper() == ".R3D":
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
# Check folder structure (RDM = RED Digital Magazine, RDC = RED Digital Clip)
|
|
184
|
+
parts = path.parts
|
|
185
|
+
for part in parts:
|
|
186
|
+
if part.upper().endswith(".RDM") or part.upper().endswith(".RDC"):
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
def extract(
|
|
192
|
+
self,
|
|
193
|
+
probe_data: dict[str, Any],
|
|
194
|
+
file_path: str,
|
|
195
|
+
base_metadata: Metadata,
|
|
196
|
+
) -> Metadata:
|
|
197
|
+
"""Extract RED-specific metadata from R3D file."""
|
|
198
|
+
path = Path(file_path)
|
|
199
|
+
|
|
200
|
+
# Parse R3D header directly (ffprobe cannot read R3D)
|
|
201
|
+
r3d_data = _parse_r3d_header(file_path)
|
|
202
|
+
|
|
203
|
+
if r3d_data is None:
|
|
204
|
+
# Return minimal metadata
|
|
205
|
+
device = DeviceInfo(
|
|
206
|
+
make="RED",
|
|
207
|
+
model=None,
|
|
208
|
+
type=MediaDeviceType.CINEMA_CAMERA,
|
|
209
|
+
detection_method=DetectionMethod.METADATA,
|
|
210
|
+
confidence=0.8,
|
|
211
|
+
)
|
|
212
|
+
base_metadata.device = device
|
|
213
|
+
return base_metadata
|
|
214
|
+
|
|
215
|
+
# Build device info
|
|
216
|
+
# Note: serial_number stored in software field as DeviceInfo doesn't have serial
|
|
217
|
+
serial = r3d_data.get("serial")
|
|
218
|
+
firmware = r3d_data.get("firmware")
|
|
219
|
+
software_str = firmware
|
|
220
|
+
if serial:
|
|
221
|
+
software_str = f"{firmware} (S/N: {serial})" if firmware else f"S/N: {serial}"
|
|
222
|
+
|
|
223
|
+
device = DeviceInfo(
|
|
224
|
+
make="RED",
|
|
225
|
+
model=r3d_data.get("model"),
|
|
226
|
+
software=software_str,
|
|
227
|
+
type=MediaDeviceType.CINEMA_CAMERA,
|
|
228
|
+
detection_method=DetectionMethod.METADATA,
|
|
229
|
+
confidence=1.0,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Build lens info
|
|
233
|
+
lens: LensInfo | None = None
|
|
234
|
+
lens_name = r3d_data.get("lens_name")
|
|
235
|
+
if lens_name:
|
|
236
|
+
# Try to parse focal length from lens name
|
|
237
|
+
focal_match = re.search(r"(\d+)-(\d+)mm|(\d+)mm", lens_name)
|
|
238
|
+
focal_length: float | None = None
|
|
239
|
+
if focal_match:
|
|
240
|
+
if focal_match.group(3):
|
|
241
|
+
focal_length = float(focal_match.group(3))
|
|
242
|
+
elif focal_match.group(1):
|
|
243
|
+
# Zoom lens - use wide end
|
|
244
|
+
focal_length = float(focal_match.group(1))
|
|
245
|
+
|
|
246
|
+
# Try to parse aperture
|
|
247
|
+
aperture_match = re.search(r"f/?([\d.]+)", lens_name)
|
|
248
|
+
aperture: float | None = None
|
|
249
|
+
if aperture_match:
|
|
250
|
+
aperture = float(aperture_match.group(1))
|
|
251
|
+
|
|
252
|
+
# Store lens make/model/serial in iris field as LensInfo lacks those fields
|
|
253
|
+
lens_serial = r3d_data.get("lens_serial")
|
|
254
|
+
iris_info = lens_name
|
|
255
|
+
if lens_serial:
|
|
256
|
+
iris_info = f"{lens_name} (S/N: {lens_serial})"
|
|
257
|
+
|
|
258
|
+
lens = LensInfo(
|
|
259
|
+
focal_length=focal_length,
|
|
260
|
+
aperture=aperture,
|
|
261
|
+
iris=iris_info, # Store full lens info here
|
|
262
|
+
detection_method=DetectionMethod.METADATA,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Get timecode string
|
|
266
|
+
timecode: str | None = r3d_data.get("timecode")
|
|
267
|
+
|
|
268
|
+
# Get file size
|
|
269
|
+
file_size = path.stat().st_size if path.exists() else base_metadata.file_size
|
|
270
|
+
|
|
271
|
+
# R3D uses REDCODE compression
|
|
272
|
+
video_codec = VideoCodec(
|
|
273
|
+
name="REDCODE",
|
|
274
|
+
profile="RAW",
|
|
275
|
+
bit_depth=16, # RED shoots 16-bit
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Color space - RED shoots in REDWideGamutRGB / Log3G10
|
|
279
|
+
color_space = ColorSpace(
|
|
280
|
+
primaries="REDWideGamutRGB",
|
|
281
|
+
transfer="Log3G10",
|
|
282
|
+
matrix=None,
|
|
283
|
+
detection_method=DetectionMethod.METADATA,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
return Metadata(
|
|
287
|
+
duration=base_metadata.duration,
|
|
288
|
+
resolution=base_metadata.resolution,
|
|
289
|
+
codec=Codec(video="REDCODE"),
|
|
290
|
+
video_codec=video_codec,
|
|
291
|
+
audio=base_metadata.audio,
|
|
292
|
+
fps=base_metadata.fps,
|
|
293
|
+
bitrate=base_metadata.bitrate,
|
|
294
|
+
file_size=file_size,
|
|
295
|
+
timecode=timecode,
|
|
296
|
+
created_at=base_metadata.created_at,
|
|
297
|
+
device=device,
|
|
298
|
+
gps=base_metadata.gps,
|
|
299
|
+
color_space=color_space,
|
|
300
|
+
lens=lens,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
# Register the extractor
|
|
305
|
+
register_extractor("red", RedExtractor())
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Manufacturer detection and extractor registry.
|
|
2
|
+
|
|
3
|
+
This module provides a registry pattern for metadata extractors.
|
|
4
|
+
Each manufacturer module registers itself with detect() and extract() functions.
|
|
5
|
+
|
|
6
|
+
To add a new manufacturer:
|
|
7
|
+
1. Create a new module (e.g., panasonic.py)
|
|
8
|
+
2. Implement detect(probe_data, file_path) -> bool
|
|
9
|
+
3. Implement extract(probe_data, file_path, base_metadata) -> Metadata
|
|
10
|
+
4. Import the module in __init__.py to trigger registration
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any, Protocol
|
|
15
|
+
|
|
16
|
+
from media_engine.schemas import Metadata
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MetadataExtractor(Protocol):
|
|
22
|
+
"""Protocol for manufacturer-specific metadata extractors."""
|
|
23
|
+
|
|
24
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
25
|
+
"""Detect if this extractor handles the given file.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
probe_data: Parsed ffprobe JSON output
|
|
29
|
+
file_path: Path to video file
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
True if this extractor should handle the file
|
|
33
|
+
"""
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
|
|
37
|
+
"""Extract manufacturer-specific metadata.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
probe_data: Parsed ffprobe JSON output
|
|
41
|
+
file_path: Path to video file
|
|
42
|
+
base_metadata: Base metadata from ffprobe (device-agnostic)
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Enhanced Metadata with device-specific fields
|
|
46
|
+
"""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Global registry of extractors
|
|
51
|
+
_extractors: list[tuple[str, MetadataExtractor]] = []
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def register_extractor(name: str, extractor: MetadataExtractor) -> None:
|
|
55
|
+
"""Register a metadata extractor.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
name: Extractor name (e.g., "dji", "sony", "apple")
|
|
59
|
+
extractor: Extractor instance implementing detect() and extract()
|
|
60
|
+
"""
|
|
61
|
+
_extractors.append((name, extractor))
|
|
62
|
+
logger.debug(f"Registered metadata extractor: {name}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_extractor(probe_data: dict[str, Any], file_path: str) -> tuple[str, MetadataExtractor] | None:
|
|
66
|
+
"""Find the appropriate extractor for a file.
|
|
67
|
+
|
|
68
|
+
Iterates through registered extractors in order and returns the first
|
|
69
|
+
one whose detect() method returns True.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
probe_data: Parsed ffprobe JSON output
|
|
73
|
+
file_path: Path to video file
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Tuple of (name, extractor) or None if no match
|
|
77
|
+
"""
|
|
78
|
+
for name, extractor in _extractors:
|
|
79
|
+
try:
|
|
80
|
+
if extractor.detect(probe_data, file_path):
|
|
81
|
+
logger.debug(f"Matched extractor: {name}")
|
|
82
|
+
return name, extractor
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.warning(f"Extractor {name} detect() failed: {e}")
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def list_extractors() -> list[str]:
|
|
91
|
+
"""List all registered extractor names."""
|
|
92
|
+
return [name for name, _ in _extractors]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# Helper to get common tag values for detection
|
|
96
|
+
def get_tags_lower(probe_data: dict[str, Any]) -> dict[str, str]:
|
|
97
|
+
"""Get format tags with lowercase keys."""
|
|
98
|
+
format_info = probe_data.get("format", {})
|
|
99
|
+
tags = format_info.get("tags", {})
|
|
100
|
+
return {k.lower(): v for k, v in tags.items()}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_make_model(probe_data: dict[str, Any]) -> tuple[str | None, str | None]:
|
|
104
|
+
"""Extract make and model from common metadata locations.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Tuple of (make, model) - either may be None
|
|
108
|
+
"""
|
|
109
|
+
tags = get_tags_lower(probe_data)
|
|
110
|
+
|
|
111
|
+
make = tags.get("make") or tags.get("com.apple.quicktime.make") or tags.get("manufacturer")
|
|
112
|
+
model = tags.get("model") or tags.get("com.apple.quicktime.model") or tags.get("model_name")
|
|
113
|
+
|
|
114
|
+
return make, model
|