smart-media-manager 0.5.43a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ """
2
+ Metadata Field Registry Module for Smart Media Manager.
3
+
4
+ Provides UUID-based metadata field identification for unified field recognition
5
+ across different tools and formats (ExifTool, FFmpeg, libmagic, etc.).
6
+
7
+ This complements the format UUID system by providing a translation layer for
8
+ metadata field names, enabling programmatic metadata operations across tools.
9
+ """
10
+
11
+ import json
12
+ import logging
13
+ from pathlib import Path
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ LOG = logging.getLogger(__name__)
17
+
18
+ # Global registry cache
19
+ _METADATA_REGISTRY: Optional[Dict[str, Any]] = None
20
+
21
+
22
+ def load_metadata_registry() -> Dict[str, Any]:
23
+ """Load the metadata field registry from metadata_registry.json.
24
+
25
+ Returns:
26
+ Dictionary containing all metadata field mappings with UUIDs
27
+ """
28
+ global _METADATA_REGISTRY
29
+ if _METADATA_REGISTRY is not None:
30
+ return _METADATA_REGISTRY
31
+
32
+ registry_path = Path(__file__).parent / "metadata_registry.json"
33
+ if not registry_path.exists():
34
+ LOG.warning(f"Metadata registry not found at {registry_path}, using empty registry")
35
+ _METADATA_REGISTRY = {}
36
+ return _METADATA_REGISTRY
37
+
38
+ try:
39
+ with open(registry_path) as f:
40
+ _METADATA_REGISTRY = json.load(f)
41
+ field_count = sum(len(fields) for fields in _METADATA_REGISTRY.get("metadata_fields", {}).values())
42
+ LOG.info(f"Loaded metadata registry with {field_count} field definitions")
43
+ return _METADATA_REGISTRY
44
+ except Exception as exc:
45
+ LOG.error(f"Failed to load metadata registry: {exc}")
46
+ _METADATA_REGISTRY = {}
47
+ return _METADATA_REGISTRY
48
+
49
+
50
+ def lookup_metadata_field_uuid(tool_name: str, field_name: str) -> Optional[str]:
51
+ """Look up metadata field UUID from tool-specific field name.
52
+
53
+ Args:
54
+ tool_name: Name of the tool (exiftool, ffprobe, ffmpeg, etc.)
55
+ field_name: The field name as reported by the tool
56
+
57
+ Returns:
58
+ Metadata field UUID with -M suffix, or None if not found
59
+
60
+ Example:
61
+ >>> lookup_metadata_field_uuid("exiftool", "EXIF:DateTimeOriginal")
62
+ '3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M'
63
+ >>> lookup_metadata_field_uuid("ffprobe", "creation_time")
64
+ '3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M'
65
+ """
66
+ registry = load_metadata_registry()
67
+ metadata_fields = registry.get("metadata_fields", {})
68
+
69
+ # Search through all categories and fields
70
+ for category in metadata_fields.values():
71
+ for field_info in category.values():
72
+ tool_mappings = field_info.get("tool_mappings", {})
73
+ if tool_name in tool_mappings:
74
+ if field_name in tool_mappings[tool_name]:
75
+ uuid_value = field_info.get("uuid")
76
+ # Ensure we return str or None, not Any
77
+ return str(uuid_value) if uuid_value is not None else None
78
+
79
+ return None
80
+
81
+
82
+ def get_canonical_field_name(field_uuid: str) -> Optional[str]:
83
+ """Get canonical field name from UUID.
84
+
85
+ Args:
86
+ field_uuid: Metadata field UUID with -M suffix
87
+
88
+ Returns:
89
+ Canonical field name, or None if not found
90
+
91
+ Example:
92
+ >>> get_canonical_field_name("3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M")
93
+ 'creation_datetime'
94
+ """
95
+ registry = load_metadata_registry()
96
+ metadata_fields = registry.get("metadata_fields", {})
97
+
98
+ for category in metadata_fields.values():
99
+ for field_info in category.values():
100
+ if field_info.get("uuid") == field_uuid:
101
+ canonical_value = field_info.get("canonical")
102
+ # Ensure we return str or None, not Any
103
+ return str(canonical_value) if canonical_value is not None else None
104
+
105
+ return None
106
+
107
+
108
+ def get_tool_field_names(field_uuid: str, tool_name: str) -> List[str]:
109
+ """Get all tool-specific field names for a UUID.
110
+
111
+ Args:
112
+ field_uuid: Metadata field UUID with -M suffix
113
+ tool_name: Tool to get field names for (exiftool, ffprobe, etc.)
114
+
115
+ Returns:
116
+ List of field names used by the tool
117
+
118
+ Example:
119
+ >>> get_tool_field_names("3d4f8a9c-...-M", "exiftool")
120
+ ['EXIF:DateTimeOriginal', 'EXIF:CreateDate', 'XMP:CreateDate']
121
+ """
122
+ registry = load_metadata_registry()
123
+ metadata_fields = registry.get("metadata_fields", {})
124
+
125
+ for category in metadata_fields.values():
126
+ for field_info in category.values():
127
+ if field_info.get("uuid") == field_uuid:
128
+ tool_mappings = field_info.get("tool_mappings", {})
129
+ field_names = tool_mappings.get(tool_name, [])
130
+ # Ensure we return List[str], not Any - validate each item is a string
131
+ return [str(name) for name in field_names] if isinstance(field_names, list) else []
132
+
133
+ return []
134
+
135
+
136
+ def translate_field_name(source_tool: str, source_field: str, target_tool: str) -> Optional[str]:
137
+ """Translate a field name from one tool to another using UUID mapping.
138
+
139
+ Args:
140
+ source_tool: Tool that reported the field (exiftool, ffprobe, etc.)
141
+ source_field: Field name as reported by source tool
142
+ target_tool: Tool to translate field name for
143
+
144
+ Returns:
145
+ First matching field name for target tool, or None if not found
146
+
147
+ Example:
148
+ >>> translate_field_name("exiftool", "EXIF:DateTimeOriginal", "ffprobe")
149
+ 'creation_time'
150
+ >>> translate_field_name("ffprobe", "creation_time", "exiftool")
151
+ 'EXIF:DateTimeOriginal'
152
+ """
153
+ # Look up UUID from source tool's field name
154
+ field_uuid = lookup_metadata_field_uuid(source_tool, source_field)
155
+ if not field_uuid:
156
+ return None
157
+
158
+ # Get target tool's field names for this UUID
159
+ target_fields = get_tool_field_names(field_uuid, target_tool)
160
+ return target_fields[0] if target_fields else None
161
+
162
+
163
+ def normalize_metadata_dict(tool_name: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
164
+ """Normalize a metadata dictionary to use canonical field names (UUIDs).
165
+
166
+ Converts tool-specific field names to UUIDs for tool-agnostic metadata handling.
167
+
168
+ Args:
169
+ tool_name: Tool that produced the metadata (exiftool, ffprobe, etc.)
170
+ metadata: Dictionary with tool-specific field names
171
+
172
+ Returns:
173
+ Dictionary with UUID keys and original values
174
+
175
+ Example:
176
+ >>> normalize_metadata_dict("ffprobe", {"creation_time": "2024-01-15"})
177
+ {'3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M': '2024-01-15'}
178
+ """
179
+ normalized = {}
180
+ for field_name, value in metadata.items():
181
+ field_uuid = lookup_metadata_field_uuid(tool_name, field_name)
182
+ if field_uuid:
183
+ normalized[field_uuid] = value
184
+ else:
185
+ # Preserve unmapped fields with original name
186
+ LOG.debug(f"No UUID mapping for {tool_name} field: {field_name}")
187
+ normalized[f"unmapped:{tool_name}:{field_name}"] = value
188
+
189
+ return normalized
190
+
191
+
192
+ def get_field_description(field_uuid: str) -> Optional[str]:
193
+ """Get human-readable description of a metadata field.
194
+
195
+ Args:
196
+ field_uuid: Metadata field UUID with -M suffix
197
+
198
+ Returns:
199
+ Field description, or None if not found
200
+ """
201
+ registry = load_metadata_registry()
202
+ metadata_fields = registry.get("metadata_fields", {})
203
+
204
+ for category in metadata_fields.values():
205
+ for field_info in category.values():
206
+ if field_info.get("uuid") == field_uuid:
207
+ description_value = field_info.get("description")
208
+ # Ensure we return str or None, not Any
209
+ return str(description_value) if description_value is not None else None
210
+
211
+ return None
212
+
213
+
214
+ def get_all_field_uuids() -> List[str]:
215
+ """Get list of all metadata field UUIDs in the registry.
216
+
217
+ Returns:
218
+ List of all field UUIDs
219
+ """
220
+ registry = load_metadata_registry()
221
+ metadata_fields = registry.get("metadata_fields", {})
222
+
223
+ uuids = []
224
+ for category in metadata_fields.values():
225
+ for field_info in category.values():
226
+ if "uuid" in field_info:
227
+ uuids.append(field_info["uuid"])
228
+
229
+ return uuids
@@ -0,0 +1,140 @@
1
+ """UUID generator for format identification with parameters.
2
+
3
+ Generates deterministic UUIDs based on format parameters:
4
+ - Codec (h264, hevc, vp9, etc.)
5
+ - Bit depth (8, 10, 12, 16)
6
+ - Pixel format (yuv420p, yuv422p, yuv444p, rgb24, etc.)
7
+ - Profile (high, main, main10, etc.)
8
+ - Sample rate (for audio)
9
+ - Sample format (for audio)
10
+ """
11
+
12
+ import hashlib
13
+ from typing import Optional
14
+
15
+
16
+ def generate_video_uuid(codec: str, bit_depth: Optional[int] = None, pix_fmt: Optional[str] = None, profile: Optional[str] = None) -> str:
17
+ """Generate a deterministic UUID for video format.
18
+
19
+ Args:
20
+ codec: Codec name (h264, hevc, vp9, av1, etc.)
21
+ bit_depth: Bit depth (8, 10, 12, 16)
22
+ pix_fmt: Pixel format (yuv420p, yuv422p, yuv444p, rgb24, etc.)
23
+ profile: Codec profile (high, main, main10, etc.)
24
+
25
+ Returns:
26
+ UUID string in format: {hash}-{bitdepth}-{pixfmt}-{profile}-V
27
+
28
+ Examples:
29
+ >>> generate_video_uuid("h264", 8, "yuv420p", "high")
30
+ 'b2e62c4a-6122-548c-9bfa-0fcf3613942a-8bit-yuv420p-high-V'
31
+ """
32
+ # Use base codec UUID (for backward compatibility with existing UUIDs)
33
+ base_uuids = {
34
+ "h264": "b2e62c4a-6122-548c-9bfa-0fcf3613942a",
35
+ "hevc": "faf4b553-de47-5bc8-80ea-d026a2571456",
36
+ "av1": "c69693cd-1fcd-5608-a8df-9476a00cfa9b",
37
+ "vp9": "4c9b19a7-ec9f-57c2-98ca-3ac8432b27cc",
38
+ }
39
+
40
+ base_uuid = base_uuids.get(codec.lower())
41
+ if not base_uuid:
42
+ # Generate deterministic UUID for unknown codec
43
+ base_uuid = hashlib.sha256(codec.encode()).hexdigest()[:36]
44
+
45
+ # If no parameters provided, return base UUID (backward compatibility)
46
+ if not any([bit_depth, pix_fmt, profile]):
47
+ return f"{base_uuid}-V"
48
+
49
+ # Build parameter suffix
50
+ params = []
51
+ if bit_depth:
52
+ params.append(f"{bit_depth}bit")
53
+ if pix_fmt:
54
+ params.append(pix_fmt)
55
+ if profile:
56
+ params.append(profile.lower())
57
+
58
+ param_suffix = "-".join(params) if params else "default"
59
+ return f"{base_uuid}-{param_suffix}-V"
60
+
61
+
62
+ def generate_audio_uuid(codec: str, sample_rate: Optional[int] = None, sample_fmt: Optional[str] = None) -> str:
63
+ """Generate a deterministic UUID for audio format.
64
+
65
+ Args:
66
+ codec: Codec name (aac, opus, vorbis, etc.)
67
+ sample_rate: Sample rate in Hz (44100, 48000, etc.)
68
+ sample_fmt: Sample format (s16, s24, s32, f32, etc.)
69
+
70
+ Returns:
71
+ UUID string in format: {codec}-{samplerate}-{samplefmt}-A
72
+
73
+ Examples:
74
+ >>> generate_audio_uuid("aac", 48000, "s16")
75
+ 'aac-48000-s16-A'
76
+ """
77
+ params = [codec.lower()]
78
+ if sample_rate:
79
+ params.append(str(sample_rate))
80
+ if sample_fmt:
81
+ params.append(sample_fmt)
82
+
83
+ return "-".join(params) + "-A"
84
+
85
+
86
+ def parse_video_uuid(uuid: str) -> dict:
87
+ """Parse a video UUID to extract parameters.
88
+
89
+ Args:
90
+ uuid: Video UUID string
91
+
92
+ Returns:
93
+ Dict with keys: base_uuid, bit_depth, pix_fmt, profile
94
+
95
+ Examples:
96
+ >>> parse_video_uuid("b2e62c4a-6122-548c-9bfa-0fcf3613942a-8bit-yuv420p-high-V")
97
+ {'base_uuid': 'b2e62c4a-6122-548c-9bfa-0fcf3613942a', 'bit_depth': 8,
98
+ 'pix_fmt': 'yuv420p', 'profile': 'high'}
99
+ """
100
+ parts = uuid.split("-")
101
+
102
+ result = {"base_uuid": None, "bit_depth": None, "pix_fmt": None, "profile": None}
103
+
104
+ # Extract base UUID (first 5 parts: xxxx-xxxx-xxxx-xxxx-xxxx)
105
+ if len(parts) >= 5:
106
+ result["base_uuid"] = "-".join(parts[:5])
107
+
108
+ # Parse parameter suffix
109
+ remaining = parts[5:] if len(parts) > 5 else []
110
+
111
+ for param in remaining:
112
+ if param.endswith("bit"):
113
+ try:
114
+ result["bit_depth"] = int(param[:-3])
115
+ except ValueError:
116
+ pass
117
+ elif param.startswith("yuv") or param in ("rgb24", "rgba", "gray"):
118
+ result["pix_fmt"] = param
119
+ elif param not in ("V", "A", "I", "C", "R"):
120
+ result["profile"] = param
121
+
122
+ return result
123
+
124
+
125
+ if __name__ == "__main__":
126
+ # Test UUID generation
127
+ print("Video UUIDs:")
128
+ print(f" H.264 8-bit 4:2:0 High: {generate_video_uuid('h264', 8, 'yuv420p', 'high')}")
129
+ print(f" H.264 10-bit 4:2:0 High10: {generate_video_uuid('h264', 10, 'yuv420p', 'high10')}")
130
+ print(f" HEVC 8-bit 4:2:0 Main: {generate_video_uuid('hevc', 8, 'yuv420p', 'main')}")
131
+ print(f" HEVC 10-bit 4:2:0 Main10: {generate_video_uuid('hevc', 10, 'yuv420p', 'main10')}")
132
+
133
+ print("\nAudio UUIDs:")
134
+ print(f" AAC 48kHz 16-bit: {generate_audio_uuid('aac', 48000, 's16')}")
135
+ print(f" AAC 6kHz 16-bit: {generate_audio_uuid('aac', 6000, 's16')}")
136
+
137
+ print("\nParsing test:")
138
+ uuid = generate_video_uuid("h264", 10, "yuv420p", "high10")
139
+ print(f" UUID: {uuid}")
140
+ print(f" Parsed: {parse_video_uuid(uuid)}")