smart-media-manager 0.5.43a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,491 @@
1
+ """
2
+ Format Registry Module for Smart Media Manager.
3
+
4
+ Provides UUID-based format identification and compatibility checking
5
+ using the unified format registry system.
6
+ """
7
+
8
+ import importlib.resources as resources
9
+ import json
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional, Set
13
+
14
+ LOG = logging.getLogger(__name__)
15
+
16
+ # Global registry cache
17
+ _REGISTRY: Optional[Dict[str, Any]] = None
18
+ _COMPATIBILITY: Optional[Dict[str, Any]] = None
19
+
20
+
21
+ def _merge_unique_list(target: list, additions: list) -> None:
22
+ existing = set(target)
23
+ for item in additions:
24
+ if item not in existing:
25
+ target.append(item)
26
+ existing.add(item)
27
+
28
+
29
+ def _merge_updates(base: Dict[str, Any], updates: Dict[str, Any]) -> None:
30
+ """Merge a compatibility/registry updates overlay into the base data.
31
+
32
+ The overlay format matches the auto-generated format_registry_updates_*.json
33
+ files produced during runs when unknown mappings are encountered.
34
+ """
35
+
36
+ # tool_mappings
37
+ for tool, mapping in updates.get("tool_mappings", {}).items():
38
+ base.setdefault("tool_mappings", {}).setdefault(tool, {}).update(mapping)
39
+
40
+ # format_names
41
+ for fmt_uuid, info in updates.get("format_names", {}).items():
42
+ base.setdefault("format_names", {}).setdefault(fmt_uuid, info)
43
+
44
+ # apple_photos_compatible
45
+ compat_updates = updates.get("apple_photos_compatible", {})
46
+ if compat_updates:
47
+ base_apc = base.setdefault("apple_photos_compatible", {})
48
+ for section, values in compat_updates.items(): # images/videos
49
+ base_section = base_apc.setdefault(section, {})
50
+ for key, val in values.items():
51
+ base_list = base_section.setdefault(key, [])
52
+ if isinstance(val, list):
53
+ _merge_unique_list(base_list, val)
54
+
55
+
56
+ def _load_update_files() -> list[Path]:
57
+ """Locate format_registry_updates_*.json overlays in likely locations."""
58
+ candidates: list[Path] = []
59
+ cwd = Path.cwd()
60
+ candidates.extend(sorted(cwd.glob("format_registry_updates_*.json")))
61
+
62
+ repo_root = Path(__file__).parent.parent
63
+ if repo_root != cwd:
64
+ candidates.extend(sorted(repo_root.glob("format_registry_updates_*.json")))
65
+
66
+ return [path for path in candidates if path.is_file()]
67
+
68
+
69
+ def load_format_registry() -> Dict[str, Any]:
70
+ """Load the complete format registry from the packaged format_registry.json."""
71
+ global _REGISTRY
72
+ if _REGISTRY is not None:
73
+ return _REGISTRY
74
+
75
+ try:
76
+ resource_path = resources.files("smart_media_manager").joinpath("format_registry.json")
77
+ with resource_path.open("r", encoding="utf-8") as handle:
78
+ _REGISTRY = json.load(handle)
79
+ LOG.info(
80
+ "Loaded format registry with %d format definitions (packaged resource)",
81
+ len(_REGISTRY.get("format_names", {})),
82
+ )
83
+ return _REGISTRY
84
+ except FileNotFoundError:
85
+ LOG.error("Packaged format registry not found.")
86
+ except Exception as exc: # pragma: no cover - corrupted install
87
+ LOG.error(f"Failed to load packaged format registry: {exc}")
88
+ _REGISTRY = {}
89
+ return _REGISTRY
90
+
91
+
92
+ def load_compatibility_data() -> Dict[str, Any]:
93
+ """Load Apple Photos compatibility data from format_compatibility.json.
94
+
95
+ Returns:
96
+ Dictionary containing compatibility rules
97
+ """
98
+ global _COMPATIBILITY
99
+ if _COMPATIBILITY is not None:
100
+ return _COMPATIBILITY
101
+
102
+ compat_path = Path(__file__).parent / "format_compatibility.json"
103
+ if not compat_path.exists():
104
+ LOG.error(f"FATAL: Compatibility data not found at {compat_path}")
105
+ LOG.error("This file must be included in the package. The installation is corrupted.")
106
+ LOG.error("Please reinstall: uv tool uninstall smart-media-manager && uv tool install smart-media-manager")
107
+ raise FileNotFoundError(f"Critical file missing: {compat_path}")
108
+
109
+ try:
110
+ with open(compat_path) as f:
111
+ _COMPATIBILITY = json.load(f)
112
+ LOG.info("Loaded Apple Photos compatibility rules")
113
+
114
+ # Merge any local update overlays (format_registry_updates_*.json)
115
+ for update_file in _load_update_files():
116
+ try:
117
+ with update_file.open("r", encoding="utf-8") as handle:
118
+ updates = json.load(handle)
119
+ _merge_updates(_COMPATIBILITY, updates)
120
+ LOG.info("Applied compatibility updates from %s", update_file)
121
+ except Exception as exc: # noqa: BLE001
122
+ LOG.warning("Failed to apply updates from %s: %s", update_file, exc)
123
+
124
+ return _COMPATIBILITY
125
+ except Exception as exc:
126
+ LOG.error(f"FATAL: Failed to load compatibility data: {exc}")
127
+ raise RuntimeError(f"Failed to load critical compatibility data from {compat_path}") from exc
128
+
129
+
130
+ def lookup_format_uuid(tool_name: str, tool_output: str) -> Optional[str]:
131
+ """Look up format UUID from tool-specific output.
132
+
133
+ Args:
134
+ tool_name: Name of the detection tool (libmagic, puremagic, ffprobe, etc.)
135
+ tool_output: The format string returned by the tool
136
+
137
+ Returns:
138
+ Format UUID with type suffix, or None if not found
139
+ """
140
+ compat = load_compatibility_data()
141
+ mappings = compat.get("tool_mappings", {}).get(tool_name, {})
142
+
143
+ # Direct lookup
144
+ if tool_output in mappings:
145
+ result = mappings[tool_output]
146
+ # Handle both single UUID and list of UUIDs
147
+ if isinstance(result, list):
148
+ return result[0] if result else None
149
+ return result
150
+
151
+ # Partial match for complex strings (e.g., "JPEG image data, JFIF standard...")
152
+ for key, uuid in mappings.items():
153
+ if key in tool_output or tool_output in key:
154
+ if isinstance(uuid, list):
155
+ return uuid[0] if uuid else None
156
+ return uuid
157
+
158
+ return None
159
+
160
+
161
+ def get_canonical_name(format_uuid: str) -> Optional[str]:
162
+ """Get canonical format name from UUID.
163
+
164
+ Args:
165
+ format_uuid: Format UUID with type suffix
166
+
167
+ Returns:
168
+ Canonical format name, or None if not found
169
+ """
170
+ compat = load_compatibility_data()
171
+ format_info = compat.get("format_names", {}).get(format_uuid)
172
+ if format_info:
173
+ return format_info.get("canonical")
174
+ return None
175
+
176
+
177
+ def get_format_extensions(format_uuid: str) -> List[str]:
178
+ """Get file extensions for a format UUID.
179
+
180
+ Args:
181
+ format_uuid: Format UUID with type suffix
182
+
183
+ Returns:
184
+ List of file extensions (with dots)
185
+ """
186
+ compat = load_compatibility_data()
187
+ format_info = compat.get("format_names", {}).get(format_uuid)
188
+ if format_info:
189
+ return format_info.get("extensions", [])
190
+ return []
191
+
192
+
193
+ def is_apple_photos_compatible(format_uuid: str) -> bool:
194
+ """Check if format is directly compatible with Apple Photos.
195
+
196
+ Args:
197
+ format_uuid: Format UUID with type suffix
198
+
199
+ Returns:
200
+ True if format can be directly imported to Apple Photos
201
+ """
202
+ compat = load_compatibility_data()
203
+ apple_compat = compat.get("apple_photos_compatible", {})
204
+
205
+ # Check image formats
206
+ if format_uuid in apple_compat.get("images", {}).get("direct_import", []):
207
+ return True
208
+
209
+ # Check RAW formats
210
+ if format_uuid in apple_compat.get("images", {}).get("raw_formats", []):
211
+ return True
212
+
213
+ # Check video containers
214
+ if format_uuid in apple_compat.get("videos", {}).get("compatible_containers", []):
215
+ return True
216
+
217
+ # Check video codecs
218
+ if format_uuid in apple_compat.get("videos", {}).get("compatible_video_codecs", []):
219
+ return True
220
+
221
+ return False
222
+
223
+
224
+ def needs_conversion(format_uuid: str) -> bool:
225
+ """Check if format needs conversion before Apple Photos import.
226
+
227
+ Args:
228
+ format_uuid: Format UUID with type suffix
229
+
230
+ Returns:
231
+ True if format needs conversion
232
+ """
233
+ compat = load_compatibility_data()
234
+ apple_compat = compat.get("apple_photos_compatible", {})
235
+
236
+ # Check if in needs_conversion lists
237
+ if format_uuid in apple_compat.get("images", {}).get("needs_conversion", []):
238
+ return True
239
+
240
+ if format_uuid in apple_compat.get("videos", {}).get("needs_rewrap", []):
241
+ return True
242
+
243
+ if format_uuid in apple_compat.get("videos", {}).get("needs_transcode_video", []):
244
+ return True
245
+
246
+ return False
247
+
248
+
249
+ def get_format_action(format_uuid: str, video_codec: Optional[str] = None, audio_codec: Optional[str] = None, container_uuid: Optional[str] = None) -> Optional[str]:
250
+ """Determine the required action for a format based on Apple Photos compatibility.
251
+
252
+ Supports both exact UUID matching and pattern-based matching for expanded UUIDs.
253
+ For video codecs, checks format parameters (bit depth, pixel format, profile).
254
+ For videos, checks BOTH container and video codec compatibility.
255
+
256
+ Args:
257
+ format_uuid: Format UUID with type suffix (may include parameters like "8bit-yuv420p-high")
258
+ video_codec: Video codec name (for videos)
259
+ audio_codec: Audio codec name (for videos)
260
+ container_uuid: Container UUID (for videos, to check container compatibility separately)
261
+
262
+ Returns:
263
+ Action string: "import", "rewrap_to_mp4", "transcode_to_hevc_mp4", "transcode_audio_to_supported", "convert_to_png", or None if unsupported
264
+ """
265
+ compat = load_compatibility_data()
266
+ apple_compat = compat.get("apple_photos_compatible", {})
267
+
268
+ # Check if directly compatible (exact match)
269
+ if format_uuid in apple_compat.get("images", {}).get("direct_import", []):
270
+ return "import"
271
+
272
+ if format_uuid in apple_compat.get("images", {}).get("raw_formats", []):
273
+ return "import"
274
+
275
+ # For videos, check container AND codecs
276
+ # If container_uuid is provided, check container compatibility first
277
+ if container_uuid:
278
+ container_compatible = container_uuid in apple_compat.get("videos", {}).get("compatible_containers", [])
279
+ container_needs_rewrap = container_uuid in apple_compat.get("videos", {}).get("needs_rewrap", [])
280
+
281
+ # Check video codec compatibility
282
+ video_codec_compatible = format_uuid in apple_compat.get("videos", {}).get("compatible_video_codecs", [])
283
+
284
+ # Pattern-based matching for video codecs
285
+ video_needs_transcode = False
286
+ for transcode_pattern in apple_compat.get("videos", {}).get("needs_transcode_video", []):
287
+ if _uuid_matches_pattern(format_uuid, transcode_pattern):
288
+ video_needs_transcode = True
289
+ break
290
+
291
+ # Decision logic for videos with both container and codec info
292
+ if not container_compatible and container_needs_rewrap:
293
+ # Container needs rewrap
294
+ if video_codec_compatible and not video_needs_transcode:
295
+ return "rewrap_to_mp4" # Container incompatible, but codec compatible
296
+ else:
297
+ return "transcode_to_hevc_mp4" # Both container and codec incompatible
298
+ elif container_compatible:
299
+ # Container is compatible, check codecs
300
+ if video_needs_transcode:
301
+ return "transcode_to_hevc_mp4" # Video codec needs transcode
302
+ # Check audio codec compatibility using UUID matching
303
+ if audio_codec:
304
+ # Extract base UUID from expanded audio codec UUID
305
+ audio_needs_transcode = False
306
+ for transcode_pattern in apple_compat.get("videos", {}).get("needs_transcode_audio", []):
307
+ if _uuid_matches_pattern(audio_codec, transcode_pattern):
308
+ audio_needs_transcode = True
309
+ break
310
+ if audio_needs_transcode:
311
+ return "transcode_audio_to_supported" # Audio codec needs transcode
312
+ return "import" # Container and codecs are compatible
313
+ else:
314
+ # Container is neither compatible nor needs rewrap (unknown container)
315
+ if video_codec_compatible:
316
+ return "rewrap_to_mp4" # Assume container needs rewrap if codec is compatible
317
+ else:
318
+ return "transcode_to_hevc_mp4" # Full transcode needed
319
+
320
+ # Legacy path for non-video or when container_uuid not provided
321
+ if format_uuid in apple_compat.get("videos", {}).get("compatible_containers", []):
322
+ # Container is compatible, check codecs
323
+ if video_codec and video_codec not in ["h264", "hevc", "av1"]:
324
+ return "transcode_to_hevc_mp4" # Need to transcode video codec
325
+ # Check audio codec compatibility using UUID matching
326
+ if audio_codec:
327
+ audio_needs_transcode = False
328
+ for transcode_pattern in apple_compat.get("videos", {}).get("needs_transcode_audio", []):
329
+ if _uuid_matches_pattern(audio_codec, transcode_pattern):
330
+ audio_needs_transcode = True
331
+ break
332
+ if audio_needs_transcode:
333
+ return "transcode_audio_to_supported" # Need to transcode audio codec
334
+ return "import" # Container and codecs are compatible
335
+
336
+ # Check video codec UUID (exact match first)
337
+ if format_uuid in apple_compat.get("videos", {}).get("compatible_video_codecs", []):
338
+ return "import" # Video codec is compatible
339
+
340
+ # Pattern-based matching for video codecs with parameters
341
+ # Check if UUID matches any transcode patterns (10-bit H.264, 4:2:2, 4:4:4)
342
+ for transcode_pattern in apple_compat.get("videos", {}).get("needs_transcode_video", []):
343
+ if _uuid_matches_pattern(format_uuid, transcode_pattern):
344
+ return "transcode_to_hevc_mp4"
345
+
346
+ # Check if conversion needed
347
+ if format_uuid in apple_compat.get("images", {}).get("needs_conversion", []):
348
+ return "convert_to_png" # Convert incompatible image formats
349
+
350
+ if format_uuid in apple_compat.get("videos", {}).get("needs_rewrap", []):
351
+ return "rewrap_to_mp4" # Container incompatible, but codecs compatible
352
+
353
+ if format_uuid in apple_compat.get("videos", {}).get("needs_transcode_container", []):
354
+ return "transcode_to_hevc_mp4" # Container always needs full transcode (e.g., AVI)
355
+
356
+ # Check audio codec separately using UUID matching
357
+ if audio_codec:
358
+ audio_needs_transcode = False
359
+ for transcode_pattern in apple_compat.get("videos", {}).get("needs_transcode_audio", []):
360
+ if _uuid_matches_pattern(audio_codec, transcode_pattern):
361
+ audio_needs_transcode = True
362
+ break
363
+ if audio_needs_transcode:
364
+ return "transcode_audio_to_supported"
365
+
366
+ return None # Unsupported format
367
+
368
+
369
+ def _uuid_matches_pattern(uuid: str, pattern: str) -> bool:
370
+ """Check if a UUID matches a pattern (for expanded UUIDs with wildcards).
371
+
372
+ Supports wildcard patterns like:
373
+ - "b2e62c4a-6122-548c-9bfa-0fcf3613942a-10bit-V" (matches any 10-bit H.264)
374
+ - "b2e62c4a-6122-548c-9bfa-0fcf3613942a-yuv422p-V" (matches any 4:2:2 H.264)
375
+
376
+ Args:
377
+ uuid: The UUID to check
378
+ pattern: The pattern to match against
379
+
380
+ Returns:
381
+ True if UUID matches the pattern
382
+ """
383
+ # Exact match
384
+ if uuid == pattern:
385
+ return True
386
+
387
+ # Extract base UUID (first 5 parts: xxxx-xxxx-xxxx-xxxx-xxxx)
388
+ uuid_parts = uuid.split("-")
389
+ pattern_parts = pattern.split("-")
390
+
391
+ if len(uuid_parts) < 5 or len(pattern_parts) < 5:
392
+ return False
393
+
394
+ # Base UUIDs must match
395
+ uuid_base = "-".join(uuid_parts[:5])
396
+ pattern_base = "-".join(pattern_parts[:5])
397
+
398
+ if uuid_base != pattern_base:
399
+ return False
400
+
401
+ # If pattern has no parameters, match any UUID with same base
402
+ if len(pattern_parts) == 6 and pattern_parts[5] in ["V", "A", "I", "C", "R"]:
403
+ return True
404
+
405
+ # Check if UUID contains the pattern's key parameters
406
+ uuid_params = set(uuid_parts[5:])
407
+ pattern_params = set(pattern_parts[5:])
408
+
409
+ # Pattern parameters must be present in UUID
410
+ return pattern_params.issubset(uuid_params)
411
+
412
+
413
+ def get_compatible_formats() -> Set[str]:
414
+ """Get set of all Apple Photos compatible format UUIDs.
415
+
416
+ Returns:
417
+ Set of format UUIDs that are compatible
418
+ """
419
+ compat = load_compatibility_data()
420
+ apple_compat = compat.get("apple_photos_compatible", {})
421
+
422
+ compatible = set()
423
+
424
+ # Add image formats
425
+ compatible.update(apple_compat.get("images", {}).get("direct_import", []))
426
+ compatible.update(apple_compat.get("images", {}).get("raw_formats", []))
427
+
428
+ # Add video formats
429
+ compatible.update(apple_compat.get("videos", {}).get("compatible_containers", []))
430
+ compatible.update(apple_compat.get("videos", {}).get("compatible_video_codecs", []))
431
+
432
+ return compatible
433
+
434
+
435
+ def get_incompatible_formats() -> Set[str]:
436
+ """Get set of known incompatible format UUIDs.
437
+
438
+ Returns:
439
+ Set of format UUIDs that cannot be imported
440
+ """
441
+ # For now, we'll consider anything not in the compatible set as potentially incompatible
442
+ # This could be expanded with explicit incompatible lists in the JSON
443
+ compat = load_compatibility_data()
444
+ all_formats = set(compat.get("format_names", {}).keys())
445
+ compatible = get_compatible_formats()
446
+
447
+ # Also include formats that need conversion as "compatible"
448
+ # since we can process them
449
+ needs_conv = set()
450
+ apple_compat = compat.get("apple_photos_compatible", {})
451
+ needs_conv.update(apple_compat.get("images", {}).get("needs_conversion", []))
452
+ needs_conv.update(apple_compat.get("videos", {}).get("needs_rewrap", []))
453
+ needs_conv.update(apple_compat.get("videos", {}).get("needs_transcode_video", []))
454
+
455
+ return all_formats - compatible - needs_conv
456
+
457
+
458
+ def format_detection_result(tool_results: Dict[str, str]) -> Optional[str]:
459
+ """Perform consensus-based format detection from multiple tools.
460
+
461
+ Args:
462
+ tool_results: Dictionary mapping tool names to their output strings
463
+
464
+ Returns:
465
+ Consensus format UUID, or None if no consensus
466
+ """
467
+ # Weight different tools
468
+ weights = {
469
+ "libmagic": 1.4,
470
+ "puremagic": 1.1,
471
+ "pyfsig": 1.0,
472
+ "binwalk": 1.2,
473
+ "ffprobe": 1.3,
474
+ }
475
+
476
+ # Collect votes
477
+ votes: Dict[str, float] = {}
478
+ for tool_name, tool_output in tool_results.items():
479
+ if not tool_output:
480
+ continue
481
+
482
+ uuid = lookup_format_uuid(tool_name, tool_output)
483
+ if uuid:
484
+ weight = weights.get(tool_name, 1.0)
485
+ votes[uuid] = votes.get(uuid, 0.0) + weight
486
+
487
+ if not votes:
488
+ return None
489
+
490
+ # Return highest-weighted UUID
491
+ return max(votes.items(), key=lambda x: x[1])[0]