media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Blackmagic Design metadata extraction.
|
|
2
|
+
|
|
3
|
+
Handles Blackmagic cameras:
|
|
4
|
+
- Pocket Cinema Camera (4K, 6K, 6K Pro, 6K G2)
|
|
5
|
+
- URSA Mini Pro (4.6K, 12K)
|
|
6
|
+
- Micro Cinema Camera
|
|
7
|
+
- Production Camera 4K
|
|
8
|
+
|
|
9
|
+
Detection methods:
|
|
10
|
+
- .braw extension (Blackmagic RAW)
|
|
11
|
+
- com.apple.proapps.manufacturer: "Blackmagic Design"
|
|
12
|
+
- com.apple.proapps.cameraname: camera model
|
|
13
|
+
- com.apple.proapps.customgamma: LOG profile
|
|
14
|
+
|
|
15
|
+
Note: Full BRAW metadata requires Blackmagic RAW SDK (free download).
|
|
16
|
+
Without it, we detect the format but limited metadata from ffprobe.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from media_engine.schemas import (
|
|
24
|
+
ColorSpace,
|
|
25
|
+
DetectionMethod,
|
|
26
|
+
DeviceInfo,
|
|
27
|
+
MediaDeviceType,
|
|
28
|
+
Metadata,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
from .registry import get_tags_lower, register_extractor
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _parse_custom_gamma(gamma_string: str) -> str | None:
|
|
37
|
+
"""Parse Blackmagic custom gamma string to get LOG profile name.
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
"com.blackmagic-design.productioncamera4k.filmlog" -> "filmlog"
|
|
41
|
+
"com.blackmagic-design.ursa.film" -> "film"
|
|
42
|
+
"""
|
|
43
|
+
if not gamma_string:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
parts = gamma_string.split(".")
|
|
47
|
+
if parts:
|
|
48
|
+
return parts[-1] # Return the last part as the profile name
|
|
49
|
+
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BlackmagicExtractor:
|
|
54
|
+
"""Metadata extractor for Blackmagic Design cameras."""
|
|
55
|
+
|
|
56
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
57
|
+
"""Detect if file is from a Blackmagic camera."""
|
|
58
|
+
path = Path(file_path)
|
|
59
|
+
|
|
60
|
+
# Check for BRAW extension
|
|
61
|
+
if path.suffix.lower() == ".braw":
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
tags = get_tags_lower(probe_data)
|
|
65
|
+
|
|
66
|
+
# Check ProApps manufacturer tag
|
|
67
|
+
manufacturer = tags.get("com.apple.proapps.manufacturer", "")
|
|
68
|
+
if "BLACKMAGIC" in manufacturer.upper():
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
# Check make tag
|
|
72
|
+
make = tags.get("make") or tags.get("manufacturer")
|
|
73
|
+
if make and "BLACKMAGIC" in make.upper():
|
|
74
|
+
return True
|
|
75
|
+
|
|
76
|
+
# Check custom gamma for Blackmagic signature
|
|
77
|
+
custom_gamma = tags.get("com.apple.proapps.customgamma", "")
|
|
78
|
+
if "blackmagic" in custom_gamma.lower():
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
return False
|
|
82
|
+
|
|
83
|
+
def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
|
|
84
|
+
"""Extract Blackmagic-specific metadata."""
|
|
85
|
+
path = Path(file_path)
|
|
86
|
+
tags = get_tags_lower(probe_data)
|
|
87
|
+
|
|
88
|
+
# Get device info from ProApps tags (preferred)
|
|
89
|
+
manufacturer = tags.get("com.apple.proapps.manufacturer") or tags.get("make") or "Blackmagic Design"
|
|
90
|
+
camera_name = tags.get("com.apple.proapps.cameraname") or tags.get("model")
|
|
91
|
+
|
|
92
|
+
# BRAW files are from cinema cameras
|
|
93
|
+
is_braw = path.suffix.lower() == ".braw"
|
|
94
|
+
if is_braw:
|
|
95
|
+
logger.info("BRAW detected. For full metadata, install Blackmagic RAW SDK.")
|
|
96
|
+
|
|
97
|
+
device = DeviceInfo(
|
|
98
|
+
make=manufacturer,
|
|
99
|
+
model=camera_name,
|
|
100
|
+
software=tags.get("software"),
|
|
101
|
+
type=MediaDeviceType.CINEMA_CAMERA,
|
|
102
|
+
detection_method=DetectionMethod.METADATA,
|
|
103
|
+
confidence=1.0,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Extract color space from custom gamma
|
|
107
|
+
color_space = base_metadata.color_space
|
|
108
|
+
custom_gamma = tags.get("com.apple.proapps.customgamma", "")
|
|
109
|
+
if custom_gamma:
|
|
110
|
+
profile_name = _parse_custom_gamma(custom_gamma)
|
|
111
|
+
if profile_name:
|
|
112
|
+
base_cs = base_metadata.color_space
|
|
113
|
+
color_space = ColorSpace(
|
|
114
|
+
transfer=profile_name,
|
|
115
|
+
primaries=base_cs.primaries if base_cs else None,
|
|
116
|
+
matrix=base_cs.matrix if base_cs else None,
|
|
117
|
+
detection_method=DetectionMethod.METADATA,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return Metadata(
|
|
121
|
+
duration=base_metadata.duration,
|
|
122
|
+
resolution=base_metadata.resolution,
|
|
123
|
+
codec=base_metadata.codec,
|
|
124
|
+
video_codec=base_metadata.video_codec,
|
|
125
|
+
audio=base_metadata.audio,
|
|
126
|
+
fps=base_metadata.fps,
|
|
127
|
+
bitrate=base_metadata.bitrate,
|
|
128
|
+
file_size=base_metadata.file_size,
|
|
129
|
+
timecode=base_metadata.timecode,
|
|
130
|
+
created_at=base_metadata.created_at,
|
|
131
|
+
device=device,
|
|
132
|
+
gps=base_metadata.gps,
|
|
133
|
+
color_space=color_space,
|
|
134
|
+
lens=base_metadata.lens,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# Register this extractor
|
|
139
|
+
register_extractor("blackmagic", BlackmagicExtractor())
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""Generic 360 camera metadata extractor.
|
|
2
|
+
|
|
3
|
+
Detects 360 cameras from various manufacturers:
|
|
4
|
+
- Insta360 (X3, X4, ONE RS, GO 3, etc.)
|
|
5
|
+
- Kandao QooCam (QooCam 8K, QooCam 3, etc.)
|
|
6
|
+
- GoPro MAX
|
|
7
|
+
- Ricoh Theta
|
|
8
|
+
- Samsung Gear 360
|
|
9
|
+
|
|
10
|
+
Detection methods:
|
|
11
|
+
- File extension (.insv, .insp for Insta360)
|
|
12
|
+
- Filename patterns (Q360_* for QooCam)
|
|
13
|
+
- Dual video streams with square resolution (unstitched fisheye)
|
|
14
|
+
- 2:1 aspect ratio (stitched equirectangular)
|
|
15
|
+
- Spherical metadata tags
|
|
16
|
+
- Handler names and make/model tags
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
import re
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from media_engine.schemas import (
|
|
25
|
+
DetectionMethod,
|
|
26
|
+
DeviceInfo,
|
|
27
|
+
MediaDeviceType,
|
|
28
|
+
Metadata,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
from .registry import get_tags_lower, register_extractor
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# Known 360 camera identifiers
|
|
37
|
+
CAMERA_360_BRANDS = {
|
|
38
|
+
"insta360": {
|
|
39
|
+
"make": "Insta360",
|
|
40
|
+
"patterns": [r"INS", r"Insta360"],
|
|
41
|
+
"extensions": [".insv", ".insp"],
|
|
42
|
+
},
|
|
43
|
+
"kandao": {
|
|
44
|
+
"make": "Kandao",
|
|
45
|
+
"patterns": [r"Q360_", r"QooCam", r"Kandao"],
|
|
46
|
+
"extensions": [],
|
|
47
|
+
},
|
|
48
|
+
"gopro_max": {
|
|
49
|
+
"make": "GoPro",
|
|
50
|
+
"model": "MAX",
|
|
51
|
+
"patterns": [r"GoPro MAX", r"GPMAX"],
|
|
52
|
+
"extensions": [".360"],
|
|
53
|
+
},
|
|
54
|
+
"ricoh": {
|
|
55
|
+
"make": "Ricoh",
|
|
56
|
+
"patterns": [r"RICOH THETA", r"THETA"],
|
|
57
|
+
"extensions": [],
|
|
58
|
+
},
|
|
59
|
+
"samsung": {
|
|
60
|
+
"make": "Samsung",
|
|
61
|
+
"patterns": [r"Gear 360", r"SM-R210", r"SM-C200"],
|
|
62
|
+
"extensions": [],
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class Camera360Extractor:
|
|
68
|
+
"""Extract metadata from 360 cameras."""
|
|
69
|
+
|
|
70
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
71
|
+
"""Detect if this is a 360 camera file."""
|
|
72
|
+
path = Path(file_path)
|
|
73
|
+
tags = get_tags_lower(probe_data)
|
|
74
|
+
|
|
75
|
+
# Check file extensions
|
|
76
|
+
suffix_lower = path.suffix.lower()
|
|
77
|
+
for brand_info in CAMERA_360_BRANDS.values():
|
|
78
|
+
if suffix_lower in brand_info.get("extensions", []):
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
# Check filename patterns
|
|
82
|
+
filename = path.name
|
|
83
|
+
for brand_info in CAMERA_360_BRANDS.values():
|
|
84
|
+
for pattern in brand_info.get("patterns", []):
|
|
85
|
+
if re.search(pattern, filename, re.IGNORECASE):
|
|
86
|
+
return True
|
|
87
|
+
|
|
88
|
+
# Check make/model tags
|
|
89
|
+
make = tags.get("make", "") or tags.get("manufacturer", "")
|
|
90
|
+
model = tags.get("model", "")
|
|
91
|
+
make_model = f"{make} {model}".strip()
|
|
92
|
+
|
|
93
|
+
for brand_info in CAMERA_360_BRANDS.values():
|
|
94
|
+
for pattern in brand_info.get("patterns", []):
|
|
95
|
+
if re.search(pattern, make_model, re.IGNORECASE):
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
# Check handler_name for 360 camera identifiers
|
|
99
|
+
for stream in probe_data.get("streams", []):
|
|
100
|
+
handler = stream.get("tags", {}).get("handler_name", "")
|
|
101
|
+
for brand_info in CAMERA_360_BRANDS.values():
|
|
102
|
+
for pattern in brand_info.get("patterns", []):
|
|
103
|
+
if re.search(pattern, handler, re.IGNORECASE):
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
# Check for spherical video metadata
|
|
107
|
+
if self._has_spherical_metadata(probe_data, tags):
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
# Check for dual square video streams (unstitched 360)
|
|
111
|
+
if self._has_dual_fisheye_streams(probe_data):
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
def _has_spherical_metadata(self, probe_data: dict[str, Any], tags: dict[str, Any]) -> bool:
|
|
117
|
+
"""Check for spherical/360 video metadata tags."""
|
|
118
|
+
# Check format tags
|
|
119
|
+
spherical_keys = [
|
|
120
|
+
"spherical",
|
|
121
|
+
"spherical-video",
|
|
122
|
+
"projection_type",
|
|
123
|
+
"stereo_mode",
|
|
124
|
+
"stitching_software",
|
|
125
|
+
]
|
|
126
|
+
for key in spherical_keys:
|
|
127
|
+
if key in tags:
|
|
128
|
+
return True
|
|
129
|
+
|
|
130
|
+
# Check stream side_data for spherical projection
|
|
131
|
+
for stream in probe_data.get("streams", []):
|
|
132
|
+
side_data = stream.get("side_data_list", [])
|
|
133
|
+
for data in side_data:
|
|
134
|
+
if data.get("side_data_type") == "Spherical Mapping":
|
|
135
|
+
return True
|
|
136
|
+
if "spherical" in str(data).lower():
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
def _has_dual_fisheye_streams(self, probe_data: dict[str, Any]) -> bool:
|
|
142
|
+
"""Check for dual video streams with square resolution (unstitched 360)."""
|
|
143
|
+
video_streams = [s for s in probe_data.get("streams", []) if s.get("codec_type") == "video"]
|
|
144
|
+
|
|
145
|
+
if len(video_streams) < 2:
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
# Check if both streams are square (fisheye)
|
|
149
|
+
square_streams = 0
|
|
150
|
+
for stream in video_streams:
|
|
151
|
+
width = stream.get("width", 0)
|
|
152
|
+
height = stream.get("height", 0)
|
|
153
|
+
if width > 0 and width == height:
|
|
154
|
+
square_streams += 1
|
|
155
|
+
|
|
156
|
+
return square_streams >= 2
|
|
157
|
+
|
|
158
|
+
def extract(
|
|
159
|
+
self,
|
|
160
|
+
probe_data: dict[str, Any],
|
|
161
|
+
file_path: str,
|
|
162
|
+
base_metadata: Metadata,
|
|
163
|
+
) -> Metadata:
|
|
164
|
+
"""Extract 360 camera metadata."""
|
|
165
|
+
tags = get_tags_lower(probe_data)
|
|
166
|
+
|
|
167
|
+
# Detect brand and model
|
|
168
|
+
make, model = self._detect_brand_model(probe_data, file_path, tags)
|
|
169
|
+
|
|
170
|
+
# Detect if it's unstitched (dual fisheye) or stitched (equirectangular)
|
|
171
|
+
is_unstitched = self._has_dual_fisheye_streams(probe_data)
|
|
172
|
+
|
|
173
|
+
device = DeviceInfo(
|
|
174
|
+
make=make,
|
|
175
|
+
model=model,
|
|
176
|
+
type=MediaDeviceType.CAMERA_360,
|
|
177
|
+
detection_method=DetectionMethod.METADATA,
|
|
178
|
+
confidence=1.0,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Add note about stitching status in software field
|
|
182
|
+
if is_unstitched:
|
|
183
|
+
device.software = "unstitched dual-fisheye"
|
|
184
|
+
|
|
185
|
+
base_metadata.device = device
|
|
186
|
+
return base_metadata
|
|
187
|
+
|
|
188
|
+
def _detect_brand_model(
|
|
189
|
+
self,
|
|
190
|
+
probe_data: dict[str, Any],
|
|
191
|
+
file_path: str,
|
|
192
|
+
tags: dict[str, Any],
|
|
193
|
+
) -> tuple[str, str | None]:
|
|
194
|
+
"""Detect 360 camera brand and model."""
|
|
195
|
+
path = Path(file_path)
|
|
196
|
+
filename = path.name
|
|
197
|
+
suffix_lower = path.suffix.lower()
|
|
198
|
+
|
|
199
|
+
# Check file extension first
|
|
200
|
+
for brand_key, brand_info in CAMERA_360_BRANDS.items():
|
|
201
|
+
if suffix_lower in brand_info.get("extensions", []):
|
|
202
|
+
model = self._detect_model_from_resolution(probe_data, brand_key)
|
|
203
|
+
return brand_info["make"], model
|
|
204
|
+
|
|
205
|
+
# Check filename patterns
|
|
206
|
+
if re.search(r"Q360_", filename):
|
|
207
|
+
model = self._detect_qoocam_model(probe_data)
|
|
208
|
+
return "Kandao", model
|
|
209
|
+
|
|
210
|
+
# Check make/model tags
|
|
211
|
+
make = tags.get("make", "") or tags.get("manufacturer", "")
|
|
212
|
+
model = tags.get("model", "")
|
|
213
|
+
|
|
214
|
+
if make:
|
|
215
|
+
# Normalize known brands
|
|
216
|
+
make_upper = make.upper()
|
|
217
|
+
if "INSTA" in make_upper:
|
|
218
|
+
return "Insta360", model or self._detect_model_from_resolution(probe_data, "insta360")
|
|
219
|
+
if "GOPRO" in make_upper:
|
|
220
|
+
return "GoPro", model or "MAX"
|
|
221
|
+
if "RICOH" in make_upper or "THETA" in make_upper:
|
|
222
|
+
return "Ricoh", model
|
|
223
|
+
if "SAMSUNG" in make_upper:
|
|
224
|
+
return "Samsung", model
|
|
225
|
+
if "KANDAO" in make_upper or "QOOCAM" in make_upper:
|
|
226
|
+
return "Kandao", model or self._detect_qoocam_model(probe_data)
|
|
227
|
+
|
|
228
|
+
return make, model if model else None
|
|
229
|
+
|
|
230
|
+
# Check handler for INS prefix (Insta360)
|
|
231
|
+
for stream in probe_data.get("streams", []):
|
|
232
|
+
handler = stream.get("tags", {}).get("handler_name", "")
|
|
233
|
+
if "INS" in handler.upper():
|
|
234
|
+
return "Insta360", self._detect_model_from_resolution(probe_data, "insta360")
|
|
235
|
+
|
|
236
|
+
# Fallback for detected 360 video
|
|
237
|
+
return "Unknown 360 Camera", None
|
|
238
|
+
|
|
239
|
+
def _detect_model_from_resolution(self, probe_data: dict[str, Any], brand: str) -> str | None:
|
|
240
|
+
"""Detect model based on resolution."""
|
|
241
|
+
video_streams = [s for s in probe_data.get("streams", []) if s.get("codec_type") == "video"]
|
|
242
|
+
|
|
243
|
+
for stream in video_streams:
|
|
244
|
+
width = stream.get("width", 0)
|
|
245
|
+
height = stream.get("height", 0)
|
|
246
|
+
|
|
247
|
+
if brand == "insta360":
|
|
248
|
+
if width >= 3840 or height >= 3840:
|
|
249
|
+
return "X3/X4"
|
|
250
|
+
elif width >= 2880 or height >= 2880:
|
|
251
|
+
return "ONE RS"
|
|
252
|
+
elif width >= 1920 or height >= 1920:
|
|
253
|
+
return "ONE X/X2"
|
|
254
|
+
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
def _detect_qoocam_model(self, probe_data: dict[str, Any]) -> str | None:
|
|
258
|
+
"""Detect QooCam model based on resolution and codec."""
|
|
259
|
+
video_streams = [s for s in probe_data.get("streams", []) if s.get("codec_type") == "video"]
|
|
260
|
+
|
|
261
|
+
for stream in video_streams:
|
|
262
|
+
width = stream.get("width", 0)
|
|
263
|
+
codec = stream.get("codec_name", "")
|
|
264
|
+
|
|
265
|
+
if width >= 3840:
|
|
266
|
+
if codec == "hevc":
|
|
267
|
+
return "8K" # QooCam 8K uses HEVC
|
|
268
|
+
return "8K/3"
|
|
269
|
+
elif width >= 2880:
|
|
270
|
+
return "3"
|
|
271
|
+
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# Register the extractor
|
|
276
|
+
register_extractor("camera_360", Camera360Extractor())
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Canon metadata extraction.
|
|
2
|
+
|
|
3
|
+
Handles Canon cameras:
|
|
4
|
+
- Cinema EOS: C70, C300, C500, etc.
|
|
5
|
+
- EOS R series: R5, R6, R3, etc.
|
|
6
|
+
- DSLRs: 5D, 1DX, etc.
|
|
7
|
+
|
|
8
|
+
Detection methods:
|
|
9
|
+
- make tag: "Canon"
|
|
10
|
+
- XML sidecar files (.XML)
|
|
11
|
+
|
|
12
|
+
Canon XML sidecar files contain:
|
|
13
|
+
- Device info (Manufacturer, ModelName)
|
|
14
|
+
- GPS coordinates (Location element)
|
|
15
|
+
- Creation date (CreationDate element)
|
|
16
|
+
|
|
17
|
+
Canon Cinema EOS MXF filename format:
|
|
18
|
+
- Example: A012C001_230515_BY9X.MXF or A012C001_230515BY9X.MXF
|
|
19
|
+
- The YYMMDD date is embedded after the clip number
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import logging
|
|
23
|
+
import re
|
|
24
|
+
import xml.etree.ElementTree as ET
|
|
25
|
+
from datetime import datetime, timezone
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from media_engine.schemas import (
|
|
30
|
+
GPS,
|
|
31
|
+
DetectionMethod,
|
|
32
|
+
DeviceInfo,
|
|
33
|
+
MediaDeviceType,
|
|
34
|
+
Metadata,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from .base import SidecarMetadata
|
|
38
|
+
from .registry import get_tags_lower, register_extractor
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
# Pattern for Canon Cinema EOS MXF filenames with embedded date
|
|
43
|
+
# Format: A###C###H<YYMMDD><XX>_CANON.MXF
|
|
44
|
+
# Example: A012C001H200529BY_CANON.MXF -> date is 200529 (2020-05-29)
|
|
45
|
+
CANON_DATE_PATTERN = re.compile(r"H(\d{6})", re.IGNORECASE)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _parse_date_from_filename(file_path: str) -> datetime | None:
|
|
49
|
+
"""Extract recording date from Canon MXF filename.
|
|
50
|
+
|
|
51
|
+
Canon Cinema EOS cameras encode the date in the filename:
|
|
52
|
+
- A012C001_230515_BY9X.MXF -> 2023-05-15
|
|
53
|
+
- A012C001_230515BY9X.MXF -> 2023-05-15
|
|
54
|
+
- CLIP_230515.MXF -> 2023-05-15
|
|
55
|
+
|
|
56
|
+
The date format is YYMMDD (2-digit year, month, day).
|
|
57
|
+
"""
|
|
58
|
+
filename = Path(file_path).stem
|
|
59
|
+
|
|
60
|
+
match = CANON_DATE_PATTERN.search(filename)
|
|
61
|
+
if not match:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
date_str = match.group(1)
|
|
65
|
+
try:
|
|
66
|
+
# Parse YYMMDD format
|
|
67
|
+
year = int(date_str[0:2])
|
|
68
|
+
month = int(date_str[2:4])
|
|
69
|
+
day = int(date_str[4:6])
|
|
70
|
+
|
|
71
|
+
# Convert 2-digit year to 4-digit (assume 20xx for now)
|
|
72
|
+
full_year = 2000 + year if year < 70 else 1900 + year
|
|
73
|
+
|
|
74
|
+
# Validate date components
|
|
75
|
+
if not (1 <= month <= 12 and 1 <= day <= 31):
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
return datetime(full_year, month, day, tzinfo=timezone.utc)
|
|
79
|
+
except (ValueError, IndexError):
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _parse_xml_sidecar(video_path: str) -> SidecarMetadata | None:
|
|
84
|
+
"""Parse Canon XML sidecar file for additional metadata.
|
|
85
|
+
|
|
86
|
+
Canon cameras create XML sidecar files with naming pattern:
|
|
87
|
+
- Video: A012C001_230515_BY9X.MXF
|
|
88
|
+
- XML: A012C001_230515_BY9X.XML
|
|
89
|
+
"""
|
|
90
|
+
path = Path(video_path)
|
|
91
|
+
|
|
92
|
+
xml_patterns = [
|
|
93
|
+
path.with_suffix(".XML"),
|
|
94
|
+
path.with_suffix(".xml"),
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
xml_path = None
|
|
98
|
+
for pattern in xml_patterns:
|
|
99
|
+
if pattern.exists():
|
|
100
|
+
xml_path = pattern
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
if not xml_path:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
tree = ET.parse(xml_path)
|
|
108
|
+
root = tree.getroot()
|
|
109
|
+
|
|
110
|
+
ns = {"canon": "http://www.canon.com/ns/VideoClip"}
|
|
111
|
+
|
|
112
|
+
device: DeviceInfo | None = None
|
|
113
|
+
gps: GPS | None = None
|
|
114
|
+
created_at: datetime | None = None
|
|
115
|
+
|
|
116
|
+
# Extract device info
|
|
117
|
+
device_elem = root.find(".//canon:Device", ns) or root.find(".//{*}Device")
|
|
118
|
+
if device_elem is not None:
|
|
119
|
+
manufacturer_elem = device_elem.find("canon:Manufacturer", ns) or device_elem.find("{*}Manufacturer")
|
|
120
|
+
model_elem = device_elem.find("canon:ModelName", ns) or device_elem.find("{*}ModelName")
|
|
121
|
+
|
|
122
|
+
manufacturer = manufacturer_elem.text if manufacturer_elem is not None else None
|
|
123
|
+
model_name = model_elem.text if model_elem is not None else None
|
|
124
|
+
|
|
125
|
+
if manufacturer or model_name:
|
|
126
|
+
device = DeviceInfo(
|
|
127
|
+
make=manufacturer,
|
|
128
|
+
model=model_name,
|
|
129
|
+
software=None,
|
|
130
|
+
type=MediaDeviceType.CAMERA,
|
|
131
|
+
detection_method=DetectionMethod.XML_SIDECAR,
|
|
132
|
+
confidence=1.0,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Extract creation date - try multiple possible element names
|
|
136
|
+
date_elements = [
|
|
137
|
+
".//canon:CreationDate",
|
|
138
|
+
".//canon:StartDate",
|
|
139
|
+
".//canon:Date",
|
|
140
|
+
".//{*}CreationDate",
|
|
141
|
+
".//{*}StartDate",
|
|
142
|
+
".//{*}Date",
|
|
143
|
+
]
|
|
144
|
+
for date_xpath in date_elements:
|
|
145
|
+
if date_xpath.startswith(".//canon:"):
|
|
146
|
+
date_elem = root.find(date_xpath, ns)
|
|
147
|
+
else:
|
|
148
|
+
date_elem = root.find(date_xpath)
|
|
149
|
+
|
|
150
|
+
if date_elem is not None and date_elem.text:
|
|
151
|
+
try:
|
|
152
|
+
# Try ISO format first (2023-05-15T10:30:00)
|
|
153
|
+
date_text = date_elem.text.strip()
|
|
154
|
+
if "T" in date_text:
|
|
155
|
+
created_at = datetime.fromisoformat(date_text.replace("Z", "+00:00"))
|
|
156
|
+
else:
|
|
157
|
+
# Try date only (2023-05-15)
|
|
158
|
+
created_at = datetime.strptime(date_text, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
|
159
|
+
break
|
|
160
|
+
except ValueError:
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
# Extract GPS from Location element
|
|
164
|
+
location_elem = root.find(".//canon:Location", ns) or root.find(".//{*}Location")
|
|
165
|
+
if location_elem is not None:
|
|
166
|
+
lat_elem = location_elem.find("canon:Latitude", ns) or location_elem.find("{*}Latitude")
|
|
167
|
+
lon_elem = location_elem.find("canon:Longitude", ns) or location_elem.find("{*}Longitude")
|
|
168
|
+
alt_elem = location_elem.find("canon:Altitude", ns) or location_elem.find("{*}Altitude")
|
|
169
|
+
|
|
170
|
+
lat = lat_elem.text if lat_elem is not None and lat_elem.text else None
|
|
171
|
+
lon = lon_elem.text if lon_elem is not None and lon_elem.text else None
|
|
172
|
+
alt = alt_elem.text if alt_elem is not None and alt_elem.text else None
|
|
173
|
+
|
|
174
|
+
if lat and lon:
|
|
175
|
+
try:
|
|
176
|
+
gps = GPS(
|
|
177
|
+
latitude=float(lat),
|
|
178
|
+
longitude=float(lon),
|
|
179
|
+
altitude=float(alt) if alt else None,
|
|
180
|
+
)
|
|
181
|
+
except ValueError:
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
if device or gps or created_at:
|
|
185
|
+
return SidecarMetadata(device=device, gps=gps, created_at=created_at)
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
except ET.ParseError as e:
|
|
189
|
+
logger.warning(f"Failed to parse Canon XML sidecar {xml_path}: {e}")
|
|
190
|
+
return None
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.warning(f"Error reading Canon XML sidecar {xml_path}: {e}")
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class CanonExtractor:
|
|
197
|
+
"""Metadata extractor for Canon cameras."""
|
|
198
|
+
|
|
199
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
200
|
+
"""Detect if file is from a Canon camera."""
|
|
201
|
+
tags = get_tags_lower(probe_data)
|
|
202
|
+
|
|
203
|
+
# Check make tag (various names used by different formats)
|
|
204
|
+
make = tags.get("make") or tags.get("manufacturer") or tags.get("company_name")
|
|
205
|
+
if make and "CANON" in make.upper():
|
|
206
|
+
return True
|
|
207
|
+
|
|
208
|
+
# Check for Canon XML sidecar
|
|
209
|
+
path = Path(file_path)
|
|
210
|
+
xml_patterns = [
|
|
211
|
+
path.with_suffix(".XML"),
|
|
212
|
+
path.with_suffix(".xml"),
|
|
213
|
+
]
|
|
214
|
+
for pattern in xml_patterns:
|
|
215
|
+
if pattern.exists():
|
|
216
|
+
try:
|
|
217
|
+
tree = ET.parse(pattern)
|
|
218
|
+
root = tree.getroot()
|
|
219
|
+
# Check for Canon namespace
|
|
220
|
+
if "canon.com" in str(root.tag).lower():
|
|
221
|
+
return True
|
|
222
|
+
# Check device manufacturer
|
|
223
|
+
device = root.find(".//{*}Device")
|
|
224
|
+
if device is not None:
|
|
225
|
+
mfr_elem = device.find(".//{*}Manufacturer")
|
|
226
|
+
if mfr_elem is not None and mfr_elem.text:
|
|
227
|
+
if "Canon" in mfr_elem.text:
|
|
228
|
+
return True
|
|
229
|
+
except Exception:
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
|
|
235
|
+
"""Extract Canon-specific metadata."""
|
|
236
|
+
tags = get_tags_lower(probe_data)
|
|
237
|
+
|
|
238
|
+
# Get basic device info from tags
|
|
239
|
+
make = tags.get("make") or tags.get("manufacturer") or "Canon"
|
|
240
|
+
model = tags.get("model") or tags.get("model_name")
|
|
241
|
+
|
|
242
|
+
# Parse XML sidecar for detailed metadata
|
|
243
|
+
sidecar = _parse_xml_sidecar(file_path)
|
|
244
|
+
|
|
245
|
+
# Build device info (prefer sidecar)
|
|
246
|
+
if sidecar and sidecar.device:
|
|
247
|
+
device = sidecar.device
|
|
248
|
+
else:
|
|
249
|
+
device = DeviceInfo(
|
|
250
|
+
make=make if make else "Canon",
|
|
251
|
+
model=model,
|
|
252
|
+
software=tags.get("software"),
|
|
253
|
+
type=MediaDeviceType.CAMERA,
|
|
254
|
+
detection_method=DetectionMethod.METADATA,
|
|
255
|
+
confidence=1.0,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
# Merge metadata
|
|
259
|
+
gps = sidecar.gps if sidecar and sidecar.gps else base_metadata.gps
|
|
260
|
+
|
|
261
|
+
# Get creation date: prefer base_metadata, then sidecar, then filename
|
|
262
|
+
created_at = base_metadata.created_at
|
|
263
|
+
if created_at is None and sidecar and sidecar.created_at:
|
|
264
|
+
created_at = sidecar.created_at
|
|
265
|
+
logger.debug(f"Got creation date from XML sidecar: {created_at}")
|
|
266
|
+
if created_at is None:
|
|
267
|
+
created_at = _parse_date_from_filename(file_path)
|
|
268
|
+
if created_at:
|
|
269
|
+
logger.debug(f"Parsed creation date from filename: {created_at}")
|
|
270
|
+
|
|
271
|
+
return Metadata(
|
|
272
|
+
duration=base_metadata.duration,
|
|
273
|
+
resolution=base_metadata.resolution,
|
|
274
|
+
codec=base_metadata.codec,
|
|
275
|
+
video_codec=base_metadata.video_codec,
|
|
276
|
+
audio=base_metadata.audio,
|
|
277
|
+
fps=base_metadata.fps,
|
|
278
|
+
bitrate=base_metadata.bitrate,
|
|
279
|
+
file_size=base_metadata.file_size,
|
|
280
|
+
timecode=base_metadata.timecode,
|
|
281
|
+
created_at=created_at,
|
|
282
|
+
device=device,
|
|
283
|
+
gps=gps,
|
|
284
|
+
color_space=base_metadata.color_space,
|
|
285
|
+
lens=base_metadata.lens,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# Register this extractor
|
|
290
|
+
register_extractor("canon", CanonExtractor())
|