smart-media-manager 0.5.43a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smart_media_manager/__init__.py +15 -0
- smart_media_manager/cli.py +4941 -0
- smart_media_manager/format_compatibility.json +628 -0
- smart_media_manager/format_registry.json +11874 -0
- smart_media_manager/format_registry.py +491 -0
- smart_media_manager/format_rules.py +677 -0
- smart_media_manager/metadata_registry.json +1113 -0
- smart_media_manager/metadata_registry.py +229 -0
- smart_media_manager/uuid_generator.py +140 -0
- smart_media_manager-0.5.43a4.dist-info/METADATA +340 -0
- smart_media_manager-0.5.43a4.dist-info/RECORD +15 -0
- smart_media_manager-0.5.43a4.dist-info/WHEEL +5 -0
- smart_media_manager-0.5.43a4.dist-info/entry_points.txt +2 -0
- smart_media_manager-0.5.43a4.dist-info/licenses/LICENSE +21 -0
- smart_media_manager-0.5.43a4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metadata Field Registry Module for Smart Media Manager.
|
|
3
|
+
|
|
4
|
+
Provides UUID-based metadata field identification for unified field recognition
|
|
5
|
+
across different tools and formats (ExifTool, FFmpeg, libmagic, etc.).
|
|
6
|
+
|
|
7
|
+
This complements the format UUID system by providing a translation layer for
|
|
8
|
+
metadata field names, enabling programmatic metadata operations across tools.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
LOG = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Global registry cache
|
|
19
|
+
_METADATA_REGISTRY: Optional[Dict[str, Any]] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_metadata_registry() -> Dict[str, Any]:
|
|
23
|
+
"""Load the metadata field registry from metadata_registry.json.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Dictionary containing all metadata field mappings with UUIDs
|
|
27
|
+
"""
|
|
28
|
+
global _METADATA_REGISTRY
|
|
29
|
+
if _METADATA_REGISTRY is not None:
|
|
30
|
+
return _METADATA_REGISTRY
|
|
31
|
+
|
|
32
|
+
registry_path = Path(__file__).parent / "metadata_registry.json"
|
|
33
|
+
if not registry_path.exists():
|
|
34
|
+
LOG.warning(f"Metadata registry not found at {registry_path}, using empty registry")
|
|
35
|
+
_METADATA_REGISTRY = {}
|
|
36
|
+
return _METADATA_REGISTRY
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
with open(registry_path) as f:
|
|
40
|
+
_METADATA_REGISTRY = json.load(f)
|
|
41
|
+
field_count = sum(len(fields) for fields in _METADATA_REGISTRY.get("metadata_fields", {}).values())
|
|
42
|
+
LOG.info(f"Loaded metadata registry with {field_count} field definitions")
|
|
43
|
+
return _METADATA_REGISTRY
|
|
44
|
+
except Exception as exc:
|
|
45
|
+
LOG.error(f"Failed to load metadata registry: {exc}")
|
|
46
|
+
_METADATA_REGISTRY = {}
|
|
47
|
+
return _METADATA_REGISTRY
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def lookup_metadata_field_uuid(tool_name: str, field_name: str) -> Optional[str]:
|
|
51
|
+
"""Look up metadata field UUID from tool-specific field name.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
tool_name: Name of the tool (exiftool, ffprobe, ffmpeg, etc.)
|
|
55
|
+
field_name: The field name as reported by the tool
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Metadata field UUID with -M suffix, or None if not found
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
>>> lookup_metadata_field_uuid("exiftool", "EXIF:DateTimeOriginal")
|
|
62
|
+
'3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M'
|
|
63
|
+
>>> lookup_metadata_field_uuid("ffprobe", "creation_time")
|
|
64
|
+
'3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M'
|
|
65
|
+
"""
|
|
66
|
+
registry = load_metadata_registry()
|
|
67
|
+
metadata_fields = registry.get("metadata_fields", {})
|
|
68
|
+
|
|
69
|
+
# Search through all categories and fields
|
|
70
|
+
for category in metadata_fields.values():
|
|
71
|
+
for field_info in category.values():
|
|
72
|
+
tool_mappings = field_info.get("tool_mappings", {})
|
|
73
|
+
if tool_name in tool_mappings:
|
|
74
|
+
if field_name in tool_mappings[tool_name]:
|
|
75
|
+
uuid_value = field_info.get("uuid")
|
|
76
|
+
# Ensure we return str or None, not Any
|
|
77
|
+
return str(uuid_value) if uuid_value is not None else None
|
|
78
|
+
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def get_canonical_field_name(field_uuid: str) -> Optional[str]:
|
|
83
|
+
"""Get canonical field name from UUID.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
field_uuid: Metadata field UUID with -M suffix
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Canonical field name, or None if not found
|
|
90
|
+
|
|
91
|
+
Example:
|
|
92
|
+
>>> get_canonical_field_name("3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M")
|
|
93
|
+
'creation_datetime'
|
|
94
|
+
"""
|
|
95
|
+
registry = load_metadata_registry()
|
|
96
|
+
metadata_fields = registry.get("metadata_fields", {})
|
|
97
|
+
|
|
98
|
+
for category in metadata_fields.values():
|
|
99
|
+
for field_info in category.values():
|
|
100
|
+
if field_info.get("uuid") == field_uuid:
|
|
101
|
+
canonical_value = field_info.get("canonical")
|
|
102
|
+
# Ensure we return str or None, not Any
|
|
103
|
+
return str(canonical_value) if canonical_value is not None else None
|
|
104
|
+
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def get_tool_field_names(field_uuid: str, tool_name: str) -> List[str]:
|
|
109
|
+
"""Get all tool-specific field names for a UUID.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
field_uuid: Metadata field UUID with -M suffix
|
|
113
|
+
tool_name: Tool to get field names for (exiftool, ffprobe, etc.)
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of field names used by the tool
|
|
117
|
+
|
|
118
|
+
Example:
|
|
119
|
+
>>> get_tool_field_names("3d4f8a9c-...-M", "exiftool")
|
|
120
|
+
['EXIF:DateTimeOriginal', 'EXIF:CreateDate', 'XMP:CreateDate']
|
|
121
|
+
"""
|
|
122
|
+
registry = load_metadata_registry()
|
|
123
|
+
metadata_fields = registry.get("metadata_fields", {})
|
|
124
|
+
|
|
125
|
+
for category in metadata_fields.values():
|
|
126
|
+
for field_info in category.values():
|
|
127
|
+
if field_info.get("uuid") == field_uuid:
|
|
128
|
+
tool_mappings = field_info.get("tool_mappings", {})
|
|
129
|
+
field_names = tool_mappings.get(tool_name, [])
|
|
130
|
+
# Ensure we return List[str], not Any - validate each item is a string
|
|
131
|
+
return [str(name) for name in field_names] if isinstance(field_names, list) else []
|
|
132
|
+
|
|
133
|
+
return []
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def translate_field_name(source_tool: str, source_field: str, target_tool: str) -> Optional[str]:
|
|
137
|
+
"""Translate a field name from one tool to another using UUID mapping.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
source_tool: Tool that reported the field (exiftool, ffprobe, etc.)
|
|
141
|
+
source_field: Field name as reported by source tool
|
|
142
|
+
target_tool: Tool to translate field name for
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
First matching field name for target tool, or None if not found
|
|
146
|
+
|
|
147
|
+
Example:
|
|
148
|
+
>>> translate_field_name("exiftool", "EXIF:DateTimeOriginal", "ffprobe")
|
|
149
|
+
'creation_time'
|
|
150
|
+
>>> translate_field_name("ffprobe", "creation_time", "exiftool")
|
|
151
|
+
'EXIF:DateTimeOriginal'
|
|
152
|
+
"""
|
|
153
|
+
# Look up UUID from source tool's field name
|
|
154
|
+
field_uuid = lookup_metadata_field_uuid(source_tool, source_field)
|
|
155
|
+
if not field_uuid:
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
# Get target tool's field names for this UUID
|
|
159
|
+
target_fields = get_tool_field_names(field_uuid, target_tool)
|
|
160
|
+
return target_fields[0] if target_fields else None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def normalize_metadata_dict(tool_name: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
164
|
+
"""Normalize a metadata dictionary to use canonical field names (UUIDs).
|
|
165
|
+
|
|
166
|
+
Converts tool-specific field names to UUIDs for tool-agnostic metadata handling.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
tool_name: Tool that produced the metadata (exiftool, ffprobe, etc.)
|
|
170
|
+
metadata: Dictionary with tool-specific field names
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Dictionary with UUID keys and original values
|
|
174
|
+
|
|
175
|
+
Example:
|
|
176
|
+
>>> normalize_metadata_dict("ffprobe", {"creation_time": "2024-01-15"})
|
|
177
|
+
{'3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M': '2024-01-15'}
|
|
178
|
+
"""
|
|
179
|
+
normalized = {}
|
|
180
|
+
for field_name, value in metadata.items():
|
|
181
|
+
field_uuid = lookup_metadata_field_uuid(tool_name, field_name)
|
|
182
|
+
if field_uuid:
|
|
183
|
+
normalized[field_uuid] = value
|
|
184
|
+
else:
|
|
185
|
+
# Preserve unmapped fields with original name
|
|
186
|
+
LOG.debug(f"No UUID mapping for {tool_name} field: {field_name}")
|
|
187
|
+
normalized[f"unmapped:{tool_name}:{field_name}"] = value
|
|
188
|
+
|
|
189
|
+
return normalized
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_field_description(field_uuid: str) -> Optional[str]:
|
|
193
|
+
"""Get human-readable description of a metadata field.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
field_uuid: Metadata field UUID with -M suffix
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Field description, or None if not found
|
|
200
|
+
"""
|
|
201
|
+
registry = load_metadata_registry()
|
|
202
|
+
metadata_fields = registry.get("metadata_fields", {})
|
|
203
|
+
|
|
204
|
+
for category in metadata_fields.values():
|
|
205
|
+
for field_info in category.values():
|
|
206
|
+
if field_info.get("uuid") == field_uuid:
|
|
207
|
+
description_value = field_info.get("description")
|
|
208
|
+
# Ensure we return str or None, not Any
|
|
209
|
+
return str(description_value) if description_value is not None else None
|
|
210
|
+
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def get_all_field_uuids() -> List[str]:
|
|
215
|
+
"""Get list of all metadata field UUIDs in the registry.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
List of all field UUIDs
|
|
219
|
+
"""
|
|
220
|
+
registry = load_metadata_registry()
|
|
221
|
+
metadata_fields = registry.get("metadata_fields", {})
|
|
222
|
+
|
|
223
|
+
uuids = []
|
|
224
|
+
for category in metadata_fields.values():
|
|
225
|
+
for field_info in category.values():
|
|
226
|
+
if "uuid" in field_info:
|
|
227
|
+
uuids.append(field_info["uuid"])
|
|
228
|
+
|
|
229
|
+
return uuids
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""UUID generator for format identification with parameters.
|
|
2
|
+
|
|
3
|
+
Generates deterministic UUIDs based on format parameters:
|
|
4
|
+
- Codec (h264, hevc, vp9, etc.)
|
|
5
|
+
- Bit depth (8, 10, 12, 16)
|
|
6
|
+
- Pixel format (yuv420p, yuv422p, yuv444p, rgb24, etc.)
|
|
7
|
+
- Profile (high, main, main10, etc.)
|
|
8
|
+
- Sample rate (for audio)
|
|
9
|
+
- Sample format (for audio)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def generate_video_uuid(codec: str, bit_depth: Optional[int] = None, pix_fmt: Optional[str] = None, profile: Optional[str] = None) -> str:
|
|
17
|
+
"""Generate a deterministic UUID for video format.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
codec: Codec name (h264, hevc, vp9, av1, etc.)
|
|
21
|
+
bit_depth: Bit depth (8, 10, 12, 16)
|
|
22
|
+
pix_fmt: Pixel format (yuv420p, yuv422p, yuv444p, rgb24, etc.)
|
|
23
|
+
profile: Codec profile (high, main, main10, etc.)
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
UUID string in format: {hash}-{bitdepth}-{pixfmt}-{profile}-V
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
>>> generate_video_uuid("h264", 8, "yuv420p", "high")
|
|
30
|
+
'b2e62c4a-6122-548c-9bfa-0fcf3613942a-8bit-yuv420p-high-V'
|
|
31
|
+
"""
|
|
32
|
+
# Use base codec UUID (for backward compatibility with existing UUIDs)
|
|
33
|
+
base_uuids = {
|
|
34
|
+
"h264": "b2e62c4a-6122-548c-9bfa-0fcf3613942a",
|
|
35
|
+
"hevc": "faf4b553-de47-5bc8-80ea-d026a2571456",
|
|
36
|
+
"av1": "c69693cd-1fcd-5608-a8df-9476a00cfa9b",
|
|
37
|
+
"vp9": "4c9b19a7-ec9f-57c2-98ca-3ac8432b27cc",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
base_uuid = base_uuids.get(codec.lower())
|
|
41
|
+
if not base_uuid:
|
|
42
|
+
# Generate deterministic UUID for unknown codec
|
|
43
|
+
base_uuid = hashlib.sha256(codec.encode()).hexdigest()[:36]
|
|
44
|
+
|
|
45
|
+
# If no parameters provided, return base UUID (backward compatibility)
|
|
46
|
+
if not any([bit_depth, pix_fmt, profile]):
|
|
47
|
+
return f"{base_uuid}-V"
|
|
48
|
+
|
|
49
|
+
# Build parameter suffix
|
|
50
|
+
params = []
|
|
51
|
+
if bit_depth:
|
|
52
|
+
params.append(f"{bit_depth}bit")
|
|
53
|
+
if pix_fmt:
|
|
54
|
+
params.append(pix_fmt)
|
|
55
|
+
if profile:
|
|
56
|
+
params.append(profile.lower())
|
|
57
|
+
|
|
58
|
+
param_suffix = "-".join(params) if params else "default"
|
|
59
|
+
return f"{base_uuid}-{param_suffix}-V"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def generate_audio_uuid(codec: str, sample_rate: Optional[int] = None, sample_fmt: Optional[str] = None) -> str:
|
|
63
|
+
"""Generate a deterministic UUID for audio format.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
codec: Codec name (aac, opus, vorbis, etc.)
|
|
67
|
+
sample_rate: Sample rate in Hz (44100, 48000, etc.)
|
|
68
|
+
sample_fmt: Sample format (s16, s24, s32, f32, etc.)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
UUID string in format: {codec}-{samplerate}-{samplefmt}-A
|
|
72
|
+
|
|
73
|
+
Examples:
|
|
74
|
+
>>> generate_audio_uuid("aac", 48000, "s16")
|
|
75
|
+
'aac-48000-s16-A'
|
|
76
|
+
"""
|
|
77
|
+
params = [codec.lower()]
|
|
78
|
+
if sample_rate:
|
|
79
|
+
params.append(str(sample_rate))
|
|
80
|
+
if sample_fmt:
|
|
81
|
+
params.append(sample_fmt)
|
|
82
|
+
|
|
83
|
+
return "-".join(params) + "-A"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def parse_video_uuid(uuid: str) -> dict:
|
|
87
|
+
"""Parse a video UUID to extract parameters.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
uuid: Video UUID string
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Dict with keys: base_uuid, bit_depth, pix_fmt, profile
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
>>> parse_video_uuid("b2e62c4a-6122-548c-9bfa-0fcf3613942a-8bit-yuv420p-high-V")
|
|
97
|
+
{'base_uuid': 'b2e62c4a-6122-548c-9bfa-0fcf3613942a', 'bit_depth': 8,
|
|
98
|
+
'pix_fmt': 'yuv420p', 'profile': 'high'}
|
|
99
|
+
"""
|
|
100
|
+
parts = uuid.split("-")
|
|
101
|
+
|
|
102
|
+
result = {"base_uuid": None, "bit_depth": None, "pix_fmt": None, "profile": None}
|
|
103
|
+
|
|
104
|
+
# Extract base UUID (first 5 parts: xxxx-xxxx-xxxx-xxxx-xxxx)
|
|
105
|
+
if len(parts) >= 5:
|
|
106
|
+
result["base_uuid"] = "-".join(parts[:5])
|
|
107
|
+
|
|
108
|
+
# Parse parameter suffix
|
|
109
|
+
remaining = parts[5:] if len(parts) > 5 else []
|
|
110
|
+
|
|
111
|
+
for param in remaining:
|
|
112
|
+
if param.endswith("bit"):
|
|
113
|
+
try:
|
|
114
|
+
result["bit_depth"] = int(param[:-3])
|
|
115
|
+
except ValueError:
|
|
116
|
+
pass
|
|
117
|
+
elif param.startswith("yuv") or param in ("rgb24", "rgba", "gray"):
|
|
118
|
+
result["pix_fmt"] = param
|
|
119
|
+
elif param not in ("V", "A", "I", "C", "R"):
|
|
120
|
+
result["profile"] = param
|
|
121
|
+
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
if __name__ == "__main__":
|
|
126
|
+
# Test UUID generation
|
|
127
|
+
print("Video UUIDs:")
|
|
128
|
+
print(f" H.264 8-bit 4:2:0 High: {generate_video_uuid('h264', 8, 'yuv420p', 'high')}")
|
|
129
|
+
print(f" H.264 10-bit 4:2:0 High10: {generate_video_uuid('h264', 10, 'yuv420p', 'high10')}")
|
|
130
|
+
print(f" HEVC 8-bit 4:2:0 Main: {generate_video_uuid('hevc', 8, 'yuv420p', 'main')}")
|
|
131
|
+
print(f" HEVC 10-bit 4:2:0 Main10: {generate_video_uuid('hevc', 10, 'yuv420p', 'main10')}")
|
|
132
|
+
|
|
133
|
+
print("\nAudio UUIDs:")
|
|
134
|
+
print(f" AAC 48kHz 16-bit: {generate_audio_uuid('aac', 48000, 's16')}")
|
|
135
|
+
print(f" AAC 6kHz 16-bit: {generate_audio_uuid('aac', 6000, 's16')}")
|
|
136
|
+
|
|
137
|
+
print("\nParsing test:")
|
|
138
|
+
uuid = generate_video_uuid("h264", 10, "yuv420p", "high10")
|
|
139
|
+
print(f" UUID: {uuid}")
|
|
140
|
+
print(f" Parsed: {parse_video_uuid(uuid)}")
|