smart-media-manager 0.5.43a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4941 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import datetime as dt
5
+ import logging
6
+ import math
7
+ import mimetypes
8
+ import os
9
+ import re
10
+ import shutil
11
+ import subprocess
12
+ import sys
13
+ import tempfile
14
+ import time
15
+ import unicodedata
16
+ import uuid
17
+ from collections import Counter
18
+ from contextlib import suppress
19
+ from dataclasses import dataclass, field
20
+ from pathlib import Path
21
+ from typing import Any, Callable, Iterable, Optional
22
+
23
+ import json
24
+
25
+ import filetype # type: ignore[import-untyped]
26
+ import puremagic
27
+ from PIL import Image
28
+ from isbinary import is_binary_file
29
+ from smart_media_manager import __version__
30
+ from smart_media_manager.format_rules import FormatRule, match_rule
31
+ from smart_media_manager import format_registry
32
+ from smart_media_manager import metadata_registry
33
+ from pyfsig import interface as pyfsig_interface # type: ignore[import-untyped]
34
+ import rawpy # type: ignore[import-untyped]
35
+
36
+ # python-magic requires libmagic system library (installed via Homebrew during bootstrap)
37
+ # Must be lazy-loaded so script can start and run bootstrap code
38
+ try:
39
+ import magic
40
+ except ImportError: # pragma: no cover - system dependency
41
+ magic = None # type: ignore[assignment]
42
+
43
+ LOG = logging.getLogger("smart_media_manager")
44
+ _FILE_LOG_HANDLER: Optional[logging.Handler] = None
45
+ SMM_LOGS_SUBDIR = ".smm__runtime_logs_" # Unique prefix for timestamped log directories (created in CWD, excluded from scanning)
46
+
47
+
48
+ def _log_directory() -> Optional[Path]:
49
+ if _FILE_LOG_HANDLER is None:
50
+ return None
51
+ base = getattr(_FILE_LOG_HANDLER, "baseFilename", None)
52
+ if not base:
53
+ return None
54
+ return Path(base).resolve().parent
55
+
56
+
57
+ SAFE_NAME_PATTERN = re.compile(r"[^A-Za-z0-9_.-]")
58
+ MAX_APPLESCRIPT_CHARS = 20000 # Max characters for AppleScript arguments
59
+ MAX_SAFE_STEM_LENGTH = 120 # Max length for safe filename stems
60
+ MAX_PHOTOS_FILENAME_LENGTH = 60 # Apple Photos filename limit (including extension)
61
+ APPLE_PHOTOS_FOLDER_IMPORT_TIMEOUT = 1800 # seconds (30 min) - timeout for single folder import of large collections
62
+
63
+ STAGING_TOKEN_PREFIX = "__SMM"
64
+ STAGING_TOKEN_PATTERN = re.compile(r"SMM([A-Za-z0-9]+)")
65
+ MAX_IMAGE_PIXELS_UNSET = object()
66
+
67
+ # Namespace used to generate deterministic UUIDs for previously unknown mappings
68
+ UNKNOWN_UUID_NAMESPACE = uuid.UUID("9a3e9b14-25f0-4e37-bc8e-cc3ad0e59bce")
69
+
70
+ BINWALK_EXECUTABLE = shutil.which("binwalk")
71
+
72
+ _MAGIC_MIME = None
73
+ _MAGIC_DESC = None
74
+
75
+ TOOL_PRIORITY = [
76
+ "libmagic",
77
+ "binwalk",
78
+ "puremagic",
79
+ "pyfsig",
80
+ ]
81
+
82
+ TOOL_WEIGHTS = {
83
+ "libmagic": 1.4,
84
+ "binwalk": 1.2,
85
+ "puremagic": 1.1,
86
+ "pyfsig": 1.0,
87
+ }
88
+
89
+ RAW_DEPENDENCY_GROUPS = {
90
+ "canon": {
91
+ "extensions": {".crw", ".cr2", ".cr3", ".crm", ".crx"},
92
+ "brew": ["libraw"],
93
+ "pip": ["rawpy"],
94
+ "cask": ["adobe-dng-converter"],
95
+ },
96
+ "nikon": {
97
+ "extensions": {".nef", ".nrw"},
98
+ "brew": ["libraw"],
99
+ "pip": ["rawpy"],
100
+ "cask": [],
101
+ },
102
+ "sony": {
103
+ "extensions": {".arw", ".srf", ".sr2"},
104
+ "brew": ["libraw"],
105
+ "pip": ["rawpy"],
106
+ "cask": [],
107
+ },
108
+ "fujifilm": {
109
+ "extensions": {".raf"},
110
+ "brew": ["libraw"],
111
+ "pip": ["rawpy"],
112
+ "cask": [],
113
+ },
114
+ "olympus": {
115
+ "extensions": {".orf"},
116
+ "brew": ["libraw"],
117
+ "pip": ["rawpy"],
118
+ "cask": [],
119
+ },
120
+ "panasonic": {
121
+ "extensions": {".rw2", ".raw"},
122
+ "brew": ["libraw"],
123
+ "pip": ["rawpy"],
124
+ "cask": [],
125
+ },
126
+ "pentax": {
127
+ "extensions": {".pef", ".dng"},
128
+ "brew": ["libraw"],
129
+ "pip": ["rawpy"],
130
+ "cask": [],
131
+ },
132
+ "leica": {
133
+ "extensions": {".dng", ".rwl"},
134
+ "brew": ["libraw"],
135
+ "pip": ["rawpy"],
136
+ "cask": ["adobe-dng-converter"],
137
+ },
138
+ "phaseone": {
139
+ "extensions": {".iiq", ".cap"},
140
+ "brew": ["libraw"],
141
+ "pip": ["rawpy"],
142
+ "cask": ["adobe-dng-converter"],
143
+ },
144
+ "hasselblad": {
145
+ "extensions": {".3fr", ".fff"},
146
+ "brew": ["libraw"],
147
+ "pip": ["rawpy"],
148
+ "cask": ["adobe-dng-converter"],
149
+ },
150
+ "sigma": {
151
+ "extensions": {".x3f"},
152
+ "brew": ["libraw", "libopenraw"],
153
+ "pip": ["rawpy"],
154
+ "cask": [],
155
+ },
156
+ "gopro": {
157
+ "extensions": {".gpr"},
158
+ "brew": ["libraw"],
159
+ "pip": ["rawpy"],
160
+ "cask": [],
161
+ },
162
+ "dji": {
163
+ "extensions": {".dng"},
164
+ "brew": ["libraw"],
165
+ "pip": ["rawpy"],
166
+ "cask": [],
167
+ },
168
+ }
169
+
170
+ RAW_EXTENSION_TO_GROUPS: dict[str, set[str]] = {}
171
+ for group_name, config in RAW_DEPENDENCY_GROUPS.items():
172
+ for ext in config["extensions"]:
173
+ normalized = ext.lower()
174
+ RAW_EXTENSION_TO_GROUPS.setdefault(normalized, set()).add(group_name)
175
+
176
+ _BREW_PATH_CACHE: Optional[str] = None
177
+ _PIP_PACKAGE_CACHE: set[str] = set()
178
+ _INSTALLED_RAW_GROUPS: set[str] = set()
179
+
180
+ REQUIRED_BREW_PACKAGES = {
181
+ "ffmpeg": "ffmpeg",
182
+ "libjxl": "jpeg-xl",
183
+ "libheif": "libheif",
184
+ "imagemagick": "imagemagick",
185
+ "webp": "webp",
186
+ "exiftool": "exiftool",
187
+ }
188
+
189
+ IMAGE_EXTENSION_MAP = {
190
+ "jpeg": ".jpg",
191
+ "jpg": ".jpg",
192
+ "png": ".png",
193
+ "tiff": ".tiff",
194
+ "tif": ".tiff",
195
+ "gif": ".gif",
196
+ "bmp": ".bmp",
197
+ "webp": ".webp",
198
+ "heic": ".heic",
199
+ "heif": ".heic",
200
+ }
201
+
202
+ COMPATIBLE_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".heic", ".tiff", ".gif"}
203
+ COMPATIBLE_VIDEO_CONTAINERS = {"mp4", "mov", "quicktime", "m4v"}
204
+ COMPATIBLE_VIDEO_CODECS = {
205
+ # H.264 / AVC
206
+ "h264",
207
+ "avc1",
208
+ # HEVC / H.265
209
+ "hevc",
210
+ "h265",
211
+ "hvc1",
212
+ # Apple ProRes Family (all variants supported by Photos)
213
+ "apco", # ProRes 422 Proxy
214
+ "apcs", # ProRes 422 LT
215
+ "apcn", # ProRes 422
216
+ "apch", # ProRes 422 HQ
217
+ "ap4h", # ProRes 4444
218
+ "ap4x", # ProRes 4444 XQ
219
+ # Note: ProRes RAW cannot be imported (requires Final Cut Pro)
220
+ }
221
+ COMPATIBLE_AUDIO_CODECS = {
222
+ "aac",
223
+ "mp3",
224
+ "alac",
225
+ "pcm_s16le",
226
+ "pcm_s24le",
227
+ "pcm_s16be",
228
+ "pcm_f32le",
229
+ "ac3",
230
+ "eac3",
231
+ }
232
+
233
+ ARCHIVE_EXTENSIONS = {
234
+ # Standard archives
235
+ "zip",
236
+ "rar",
237
+ "7z",
238
+ "tar",
239
+ "gz",
240
+ "bz2",
241
+ "xz",
242
+ "lz",
243
+ "lzma",
244
+ "zst", # Zstandard (used by Homebrew, etc.)
245
+ "zstd",
246
+ "cab",
247
+ "iso",
248
+ "tgz",
249
+ "tbz2",
250
+ "txz",
251
+ "cpio",
252
+ "sit", # StuffIt
253
+ "sitx",
254
+ # macOS packages/disk images
255
+ "dmg",
256
+ "pkg", # macOS installer package (XAR archive)
257
+ "xar", # eXtensible ARchive format
258
+ "mpkg", # macOS meta-package
259
+ "sparseimage",
260
+ "sparsebundle",
261
+ # Linux packages
262
+ "deb",
263
+ "rpm",
264
+ # Windows packages
265
+ "msi",
266
+ "msix",
267
+ "appx",
268
+ # Java/Android
269
+ "apk",
270
+ "jar",
271
+ "war",
272
+ "ear",
273
+ "aar", # Android library
274
+ # Browser extensions
275
+ "xpi", # Firefox/Mozilla extension
276
+ "crx", # Chrome extension
277
+ # Application packages (zip-based)
278
+ "apkg", # Anki flashcard package
279
+ "sketch", # Sketch design files
280
+ "figma",
281
+ # Office documents (zip-based XML)
282
+ "docx",
283
+ "xlsx",
284
+ "pptx",
285
+ "odt",
286
+ "ods",
287
+ "odp",
288
+ "odg",
289
+ # Ebooks
290
+ "epub",
291
+ "mobi",
292
+ "azw",
293
+ "azw3",
294
+ # ML/AI model files
295
+ "safetensors",
296
+ "gguf", # llama.cpp models
297
+ "onnx",
298
+ # Virtual disk images
299
+ "vhd",
300
+ "vhdx",
301
+ "vmdk",
302
+ "qcow2",
303
+ # Fonts
304
+ "ttf",
305
+ "otf",
306
+ "woff",
307
+ "woff2",
308
+ "eot",
309
+ "ttc", # TrueType Collection
310
+ # Executables (not archives but should skip)
311
+ "exe",
312
+ "dll",
313
+ "so",
314
+ "dylib",
315
+ # Documents
316
+ "pdf",
317
+ "rtf",
318
+ "doc", # Legacy Word
319
+ "xls", # Legacy Excel
320
+ "ppt", # Legacy PowerPoint
321
+ # Icon files (not importable into Photos)
322
+ "icns", # macOS icon
323
+ "ico", # Windows icon
324
+ "cur", # Windows cursor
325
+ "ani", # Windows animated cursor
326
+ }
327
+
328
+ ARCHIVE_MIME_TYPES = {
329
+ # Standard archives
330
+ "application/zip",
331
+ "application/x-zip-compressed",
332
+ "application/x-7z-compressed",
333
+ "application/x-tar",
334
+ "application/x-rar",
335
+ "application/x-rar-compressed",
336
+ "application/vnd.rar",
337
+ "application/gzip",
338
+ "application/x-gzip",
339
+ "application/x-bzip2",
340
+ "application/x-xz",
341
+ "application/x-lzip",
342
+ "application/x-lzma",
343
+ "application/zstd",
344
+ "application/x-cpio",
345
+ "application/x-stuffit",
346
+ "application/x-stuffitx",
347
+ # Disk images
348
+ "application/x-iso9660-image",
349
+ "application/x-apple-diskimage",
350
+ # Packages
351
+ "application/x-xar", # macOS XAR archive (.pkg, .xar)
352
+ "application/vnd.android.package-archive",
353
+ "application/java-archive",
354
+ "application/x-debian-package",
355
+ "application/x-rpm",
356
+ "application/x-msi",
357
+ # Office documents
358
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
359
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
360
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
361
+ "application/vnd.oasis.opendocument.text",
362
+ "application/vnd.oasis.opendocument.spreadsheet",
363
+ "application/vnd.oasis.opendocument.presentation",
364
+ # Ebooks
365
+ "application/epub+zip",
366
+ "application/x-mobipocket-ebook",
367
+ # Fonts
368
+ "font/otf",
369
+ "font/ttf",
370
+ "font/woff",
371
+ "font/woff2",
372
+ "application/font-sfnt",
373
+ "application/x-font-ttf",
374
+ "application/x-font-otf",
375
+ # Documents
376
+ "application/pdf",
377
+ "application/rtf",
378
+ "application/msword",
379
+ "application/vnd.ms-excel",
380
+ "application/vnd.ms-powerpoint",
381
+ # Executables
382
+ "application/x-msdownload",
383
+ "application/x-executable",
384
+ "application/x-mach-binary",
385
+ "application/x-sharedlib",
386
+ # Icon files (not importable into Photos)
387
+ "image/x-icon", # Windows .ico
388
+ "image/vnd.microsoft.icon", # Windows .ico (alternative)
389
+ "image/x-icns", # macOS .icns
390
+ "application/x-icns", # macOS .icns (alternative)
391
+ "image/x-win-bitmap", # Windows cursor
392
+ }
393
+
394
+ NON_MEDIA_REASON_KEYWORDS = (
395
+ "archive",
396
+ "unsupported format",
397
+ "format not identified",
398
+ "non-media",
399
+ "uuid detection failed",
400
+ "rawpy unsupported",
401
+ "document",
402
+ "pdf",
403
+ "installer",
404
+ "binary check failed",
405
+ "icon", # Icon files (.icns, .ico, .cur, .ani)
406
+ "cursor", # Cursor files
407
+ )
408
+
409
+ TEXTUAL_MIME_HINTS = {
410
+ "application/x-typescript",
411
+ "application/javascript",
412
+ "application/x-javascript",
413
+ "application/json",
414
+ "application/xml",
415
+ "text/javascript",
416
+ "text/typescript",
417
+ "text/x-python",
418
+ "text/x-shellscript",
419
+ "text/x-c",
420
+ "text/x-c++",
421
+ "text/x-go",
422
+ "text/x-ruby",
423
+ "text/x-php",
424
+ "text/markdown",
425
+ "text/plain",
426
+ }
427
+
428
+ TEXT_ONLY_HINT_EXTENSIONS = {
429
+ ".ts",
430
+ ".tsx",
431
+ ".js",
432
+ ".jsx",
433
+ ".mjs",
434
+ ".cjs",
435
+ ".py",
436
+ ".pyw",
437
+ ".java",
438
+ ".cs",
439
+ ".c",
440
+ ".cc",
441
+ ".cpp",
442
+ ".h",
443
+ ".hpp",
444
+ ".go",
445
+ ".rs",
446
+ ".rb",
447
+ ".php",
448
+ ".sh",
449
+ ".bash",
450
+ ".zsh",
451
+ ".ps1",
452
+ ".bat",
453
+ ".sql",
454
+ ".swift",
455
+ ".kt",
456
+ ".json",
457
+ ".yml",
458
+ ".yaml",
459
+ ".toml",
460
+ ".ini",
461
+ ".cfg",
462
+ ".conf",
463
+ ".md",
464
+ ".rst",
465
+ ".txt",
466
+ ".log",
467
+ }
468
+
469
+ VIDEO_EXTENSION_MAP = {
470
+ "mp4": ".mp4",
471
+ "m4v": ".m4v",
472
+ "mov": ".mov",
473
+ "qt": ".mov",
474
+ "avi": ".avi",
475
+ "mkv": ".mkv",
476
+ "webm": ".webm",
477
+ "flv": ".flv",
478
+ "wmv": ".wmv",
479
+ "mpg": ".mpg",
480
+ "mpeg": ".mpg",
481
+ "3gp": ".3gp",
482
+ "3g2": ".3g2",
483
+ "ts": ".ts",
484
+ "m2ts": ".ts",
485
+ "mts": ".ts",
486
+ }
487
+
488
+ VIDEO_EXTENSION_HINTS = set(VIDEO_EXTENSION_MAP.keys())
489
+
490
+ VIDEO_MIME_EXTENSION_MAP = {
491
+ "video/mp4": ".mp4",
492
+ "video/x-m4v": ".m4v",
493
+ "video/quicktime": ".mov",
494
+ "video/x-quicktime": ".mov",
495
+ "video/x-msvideo": ".avi",
496
+ "video/x-matroska": ".mkv",
497
+ "video/webm": ".webm",
498
+ "video/x-flv": ".flv",
499
+ "video/x-ms-wmv": ".wmv",
500
+ "video/mpeg": ".mpg",
501
+ "video/MP2T": ".ts",
502
+ "video/3gpp": ".3gp",
503
+ "video/3gpp2": ".3g2",
504
+ }
505
+
506
+ IMAGE_MIME_EXTENSION_MAP = {
507
+ "image/jpeg": ".jpg",
508
+ "image/png": ".png",
509
+ "image/tiff": ".tiff",
510
+ "image/gif": ".gif",
511
+ "image/bmp": ".bmp",
512
+ "image/webp": ".webp",
513
+ "image/heif": ".heic",
514
+ "image/heic": ".heic",
515
+ }
516
+
517
+ ALL_IMAGE_EXTENSIONS = set(IMAGE_EXTENSION_MAP.keys())
518
+
519
+
520
+ @dataclass
521
+ class MediaFile:
522
+ source: Path
523
+ kind: str
524
+ extension: str
525
+ format_name: str
526
+ stage_path: Optional[Path] = None
527
+ compatible: bool = False
528
+ video_codec: Optional[str] = None
529
+ audio_codec: Optional[str] = None
530
+ audio_sample_rate: Optional[int] = None
531
+ audio_sample_fmt: Optional[str] = None
532
+ original_suffix: str = ""
533
+ rule_id: str = ""
534
+ action: str = "import"
535
+ requires_processing: bool = False
536
+ was_converted: bool = False # Tracks if file was actually converted (for stats)
537
+ notes: str = ""
538
+ metadata: dict[str, Any] = field(default_factory=dict)
539
+ detected_compatible: bool = False # Detection-time compatibility prior to conversions
540
+
541
+
542
+ @dataclass
543
+ class SkipLogger:
544
+ path: Path
545
+ entries: int = 0
546
+
547
+ def log(self, file_path: Path, reason: str) -> None:
548
+ # Log to file only - avoid flooding console with skip messages
549
+ reason_lower = reason.lower()
550
+ # Non-media files are silently ignored to avoid gigantic logs
551
+ if reason_lower.startswith("non-media"):
552
+ return
553
+ LOG.debug("Skipping %s (%s)", file_path, reason)
554
+ with self.path.open("a", encoding="utf-8") as handle:
555
+ handle.write(f"{file_path}\t{reason}\n")
556
+ self.entries += 1
557
+
558
+ def has_entries(self) -> bool:
559
+ return self.entries > 0
560
+
561
+
562
+ class UnknownMappingCollector:
563
+ """Collects missing format UUID mappings and emits an update JSON.
564
+
565
+ The collector keeps only one sample per (tool, token, kind) triple
566
+ to avoid bloating memory when thousands of files share the same
567
+ missing mapping.
568
+ """
569
+
570
+ def __init__(self) -> None:
571
+ self._entries: dict[tuple[str, str, str], str] = {}
572
+
573
+ def register(self, tool: str, token: str, kind: str, sample: Path) -> None:
574
+ key = (tool, token, kind)
575
+ if key not in self._entries:
576
+ self._entries[key] = str(sample)
577
+ LOG.info("Captured missing UUID mapping: %s -> %s (%s)", tool, token, kind)
578
+
579
+ def has_entries(self) -> bool:
580
+ return bool(self._entries)
581
+
582
+ def _generated_uuid(self, token: str, kind: str) -> str:
583
+ suffix = {
584
+ "video": "V",
585
+ "audio": "A",
586
+ "image": "I",
587
+ "container": "C",
588
+ }.get(kind, "U")
589
+ base = uuid.uuid5(UNKNOWN_UUID_NAMESPACE, f"{kind}:{token}")
590
+ return f"{base}-{suffix}"
591
+
592
+ def write_updates(self, output_dir: Path) -> Optional[Path]:
593
+ if not self._entries:
594
+ return None
595
+
596
+ update: dict[str, Any] = {
597
+ "format_names": {},
598
+ "tool_mappings": {},
599
+ "apple_photos_compatible": {
600
+ "images": {"needs_conversion": []},
601
+ "videos": {
602
+ "needs_rewrap": [],
603
+ "needs_transcode_video": [],
604
+ "needs_transcode_audio": [],
605
+ "compatible_containers": [],
606
+ "compatible_video_codecs": [],
607
+ },
608
+ },
609
+ "generated_from": "smart-media-manager auto-run",
610
+ }
611
+
612
+ for (tool, token, kind), sample in sorted(self._entries.items()):
613
+ mapped_uuid = self._generated_uuid(token, kind)
614
+ update.setdefault("tool_mappings", {}).setdefault(tool, {})[token] = mapped_uuid
615
+ update["format_names"][mapped_uuid] = {
616
+ "canonical": token,
617
+ "extensions": [],
618
+ "kind": kind,
619
+ "sample": sample,
620
+ }
621
+
622
+ if kind == "video":
623
+ update["apple_photos_compatible"]["videos"]["needs_transcode_video"].append(mapped_uuid)
624
+ elif kind == "audio":
625
+ update["apple_photos_compatible"]["videos"]["needs_transcode_audio"].append(mapped_uuid)
626
+ elif kind == "image":
627
+ update["apple_photos_compatible"]["images"]["needs_conversion"].append(mapped_uuid)
628
+
629
+ run_ts = timestamp()
630
+ out_path = output_dir / f"format_registry_updates_{run_ts}.json"
631
+ try:
632
+ with out_path.open("w", encoding="utf-8") as handle:
633
+ json.dump(update, handle, indent=2, sort_keys=True)
634
+ LOG.info("Wrote %d missing mapping(s) to %s", len(self._entries), out_path)
635
+ return out_path
636
+ except Exception as exc: # noqa: BLE001
637
+ LOG.error("Failed to write format registry updates: %s", exc)
638
+ return None
639
+
640
+
641
+ # Global collector shared across the run
642
+ UNKNOWN_MAPPINGS = UnknownMappingCollector()
643
+
644
+
645
+ @dataclass
646
+ class RunStatistics:
647
+ """Tracks comprehensive statistics for a Smart Media Manager run."""
648
+
649
+ total_files_scanned: int = 0
650
+ total_binary_files: int = 0
651
+ total_text_files: int = 0
652
+ total_media_detected: int = 0
653
+ media_compatible: int = 0
654
+ media_incompatible: int = 0
655
+ incompatible_with_conversion_rule: int = 0
656
+ conversion_attempted: int = 0
657
+ conversion_succeeded: int = 0
658
+ conversion_failed: int = 0
659
+ imported_after_conversion: int = 0
660
+ imported_without_conversion: int = 0
661
+ total_imported: int = 0
662
+ refused_by_apple_photos: int = 0
663
+ refused_filenames: list[tuple[Path, str]] = field(default_factory=list)
664
+ skipped_errors: int = 0
665
+ skipped_unknown_format: int = 0
666
+ skipped_corrupt_or_empty: int = 0
667
+ skipped_non_media: int = 0
668
+ skipped_other: int = 0
669
+ staging_total: int = 0
670
+ staging_expected: int = 0
671
+
672
+ def print_summary(self) -> None:
673
+ """Print a colored, formatted summary of the run statistics."""
674
+ # ANSI color codes
675
+ BOLD = "\033[1m"
676
+ GREEN = "\033[92m"
677
+ YELLOW = "\033[93m"
678
+ RED = "\033[91m"
679
+ BLUE = "\033[94m"
680
+ CYAN = "\033[96m"
681
+ RESET = "\033[0m"
682
+
683
+ print(f"\n{BOLD}{'=' * 80}{RESET}")
684
+ print(f"{BOLD}{CYAN}Smart Media Manager - Run Summary{RESET}")
685
+ print(f"{BOLD}{'=' * 80}{RESET}\n")
686
+
687
+ # Scanning section
688
+ print(f"{BOLD}{BLUE}Scanning:{RESET}")
689
+ print(f" Total files scanned: {self.total_files_scanned:>6}")
690
+ print(f" Binary files: {self.total_binary_files:>6}")
691
+ print(f" Text files: {self.total_text_files:>6}\n")
692
+
693
+ # Detection section
694
+ print(f"{BOLD}{BLUE}Media Detection:{RESET}")
695
+ print(f" Media files detected: {self.total_media_detected:>6}")
696
+ print(f" Compatible (no conversion): {GREEN}{self.media_compatible:>6}{RESET}")
697
+ print(f" Incompatible: {YELLOW}{self.media_incompatible:>6}{RESET}")
698
+ print(f" └─ With conversion rule: {self.incompatible_with_conversion_rule:>6}\n")
699
+
700
+ # Conversion section
701
+ if self.conversion_attempted > 0:
702
+ print(f"{BOLD}{BLUE}Conversion:{RESET}")
703
+ print(f" Attempted: {self.conversion_attempted:>6}")
704
+ print(f" Succeeded: {GREEN}{self.conversion_succeeded:>6}{RESET}")
705
+ print(f" Failed: {RED}{self.conversion_failed:>6}{RESET}\n")
706
+
707
+ # Import section
708
+ print(f"{BOLD}{BLUE}Apple Photos Import:{RESET}")
709
+ print(f" Imported (after conversion):{GREEN}{self.imported_after_conversion:>6}{RESET}")
710
+ print(f" Imported (direct): {GREEN}{self.imported_without_conversion:>6}{RESET}")
711
+ print(f" Total imported: {BOLD}{GREEN}{self.total_imported:>6}{RESET}")
712
+ print(f" Refused by Apple Photos: {RED}{self.refused_by_apple_photos:>6}{RESET}")
713
+
714
+ if self.total_imported + self.refused_by_apple_photos > 0:
715
+ success_rate = (self.total_imported / (self.total_imported + self.refused_by_apple_photos)) * 100
716
+ color = GREEN if success_rate >= 95 else YELLOW if success_rate >= 80 else RED
717
+ print(f" Success rate: {color}{success_rate:>5.1f}%{RESET}\n")
718
+ else:
719
+ print()
720
+
721
+ # Skipped section
722
+ total_skipped = self.skipped_errors + self.skipped_unknown_format + self.skipped_corrupt_or_empty + self.skipped_non_media + self.skipped_other
723
+ if total_skipped > 0:
724
+ print(f"{BOLD}{BLUE}Skipped Files:{RESET}")
725
+ print(f" Due to errors: {self.skipped_errors:>6}")
726
+ print(f" Unknown format: {self.skipped_unknown_format:>6}")
727
+ print(f" Corrupt or empty: {self.skipped_corrupt_or_empty:>6}")
728
+ if self.skipped_non_media:
729
+ print(f" Non-media files: {self.skipped_non_media:>6}")
730
+ print(f" Other reasons: {self.skipped_other:>6}")
731
+ print(f" Total skipped: {YELLOW}{total_skipped:>6}{RESET}\n")
732
+
733
+ print(f" Total Files In The STAGING FOLDER: {self.staging_total:>6}")
734
+ print(f" Expected Files In The STAGING FOLDER: {self.staging_expected:>6}\n")
735
+
736
+ # Failed imports detail
737
+ if self.refused_filenames:
738
+ print(f"{BOLD}{RED}Files Refused by Apple Photos:{RESET}")
739
+ for path, reason in self.refused_filenames[:10]: # Show first 10
740
+ print(f" • {path.name}")
741
+ print(f" Reason: {reason}")
742
+ if len(self.refused_filenames) > 10:
743
+ print(f" ... and {len(self.refused_filenames) - 10} more (see log for full list)\n")
744
+ else:
745
+ print()
746
+
747
+ print(f"{BOLD}{'=' * 80}{RESET}\n")
748
+
749
+ def log_summary(self) -> None:
750
+ """Log the summary to the file logger."""
751
+ LOG.info("=" * 80)
752
+ LOG.info("Run Summary Statistics")
753
+ LOG.info("=" * 80)
754
+ LOG.info(
755
+ "Scanning: total=%d, binary=%d, text=%d",
756
+ self.total_files_scanned,
757
+ self.total_binary_files,
758
+ self.total_text_files,
759
+ )
760
+ LOG.info(
761
+ "Media Detection: detected=%d, compatible=%d, incompatible=%d (with_rule=%d)",
762
+ self.total_media_detected,
763
+ self.media_compatible,
764
+ self.media_incompatible,
765
+ self.incompatible_with_conversion_rule,
766
+ )
767
+ LOG.info(
768
+ "Conversion: attempted=%d, succeeded=%d, failed=%d",
769
+ self.conversion_attempted,
770
+ self.conversion_succeeded,
771
+ self.conversion_failed,
772
+ )
773
+ LOG.info(
774
+ "Import: converted=%d, direct=%d, total=%d, refused=%d",
775
+ self.imported_after_conversion,
776
+ self.imported_without_conversion,
777
+ self.total_imported,
778
+ self.refused_by_apple_photos,
779
+ )
780
+ if self.total_imported + self.refused_by_apple_photos > 0:
781
+ success_rate = (self.total_imported / (self.total_imported + self.refused_by_apple_photos)) * 100
782
+ LOG.info("Success rate: %.1f%%", success_rate)
783
+ LOG.info(
784
+ "Skipped: errors=%d, unknown=%d, corrupt=%d, non_media=%d, other=%d",
785
+ self.skipped_errors,
786
+ self.skipped_unknown_format,
787
+ self.skipped_corrupt_or_empty,
788
+ self.skipped_non_media,
789
+ self.skipped_other,
790
+ )
791
+ LOG.info("Staging: total=%d, expected=%d", self.staging_total, self.staging_expected)
792
+ if self.refused_filenames:
793
+ LOG.info("Refused files:")
794
+ for path, reason in self.refused_filenames:
795
+ LOG.info(" %s: %s", path, reason)
796
+ LOG.info("=" * 80)
797
+
798
+
799
+ @dataclass
800
+ class FormatVote:
801
+ tool: str
802
+ mime: Optional[str] = None
803
+ extension: Optional[str] = None
804
+ description: Optional[str] = None
805
+ kind: Optional[str] = None
806
+ error: Optional[str] = None
807
+
808
+
809
+ def find_executable(*candidates: str) -> Optional[str]:
810
+ for candidate in candidates:
811
+ path = shutil.which(candidate)
812
+ if path:
813
+ return path
814
+ return None
815
+
816
+
817
+ def resolve_imagemagick_command() -> str:
818
+ cmd = find_executable("magick", "convert")
819
+ if not cmd:
820
+ raise RuntimeError("ImageMagick (magick/convert) not found. Please install imagemagick.")
821
+ return cmd
822
+
823
+
824
+ def ensure_ffmpeg_path() -> str:
825
+ cmd = find_executable("ffmpeg")
826
+ if not cmd:
827
+ raise RuntimeError("ffmpeg not found. Please install ffmpeg.")
828
+ return cmd
829
+
830
+
831
+ def is_animated_gif(path: Path) -> bool:
832
+ try:
833
+ with path.open("rb") as handle:
834
+ data = handle.read()
835
+ except OSError:
836
+ return False
837
+ return data.count(b"\x2c") > 1 and b"NETSCAPE2.0" in data
838
+
839
+
840
+ def is_animated_png(path: Path) -> bool:
841
+ try:
842
+ with path.open("rb") as handle:
843
+ data = handle.read()
844
+ except OSError:
845
+ return False
846
+ return b"acTL" in data
847
+
848
+
849
+ def is_animated_webp(path: Path) -> bool:
850
+ try:
851
+ with path.open("rb") as handle:
852
+ data = handle.read(65536)
853
+ except OSError:
854
+ return False
855
+ return b"ANIM" in data
856
+
857
+
858
+ def get_psd_color_mode(path: Path) -> Optional[str]:
859
+ try:
860
+ with path.open("rb") as handle:
861
+ header = handle.read(26)
862
+ except OSError:
863
+ return None
864
+ if len(header) < 26 or header[:4] != b"8BPS":
865
+ return None
866
+ color_mode = int.from_bytes(header[24:26], "big")
867
+ mapping = {
868
+ 0: "bitmap",
869
+ 1: "grayscale",
870
+ 2: "indexed",
871
+ 3: "rgb",
872
+ 4: "cmyk",
873
+ 7: "lab",
874
+ 8: "multichannel",
875
+ 9: "duotone",
876
+ }
877
+ return mapping.get(color_mode)
878
+
879
+
880
+ @dataclass
881
+ class Signature:
882
+ extension: Optional[str] = None
883
+ mime: Optional[str] = None
884
+
885
+ def is_empty(self) -> bool:
886
+ return not self.extension and not self.mime
887
+
888
+
889
+ def normalize_extension(ext: Optional[str]) -> Optional[str]:
890
+ if not ext:
891
+ return None
892
+ normalized = ext.strip().lower()
893
+ if not normalized:
894
+ return None
895
+ if normalized.startswith("."):
896
+ normalized = normalized[1:]
897
+ return normalized
898
+
899
+
900
+ def looks_like_text_file(path: Path, max_bytes: int = 4096) -> bool:
901
+ try:
902
+ with path.open("rb") as handle:
903
+ sample = handle.read(max_bytes)
904
+ except OSError:
905
+ return False
906
+ if not sample:
907
+ return True
908
+ if b"\x00" in sample:
909
+ return False
910
+ printable = sum(1 for byte in sample if 32 <= byte <= 126 or byte in (9, 10, 13))
911
+ return printable / len(sample) > 0.9
912
+
913
+
914
+ def timestamp() -> str:
915
+ return dt.datetime.now().strftime("%Y%m%d%H%M%S")
916
+
917
+
918
+ def tool_rank(tool: str) -> int:
919
+ try:
920
+ return TOOL_PRIORITY.index(tool)
921
+ except ValueError:
922
+ return len(TOOL_PRIORITY)
923
+
924
+
925
+ def vote_weight(vote: FormatVote) -> float:
926
+ return TOOL_WEIGHTS.get(vote.tool, 1.0)
927
+
928
+
929
+ def collect_raw_groups_from_extensions(exts: Iterable[Optional[str]]) -> set[str]:
930
+ groups: set[str] = set()
931
+ for ext in exts:
932
+ normalized = ensure_dot_extension(ext)
933
+ if not normalized:
934
+ continue
935
+ groups.update(RAW_EXTENSION_TO_GROUPS.get(normalized.lower(), set()))
936
+ return groups
937
+
938
+
939
+ def is_raw_extension(ext: Optional[str]) -> bool:
940
+ normalized = ensure_dot_extension(ext)
941
+ return bool(normalized and normalized.lower() in RAW_EXTENSION_TO_GROUPS)
942
+
943
+
944
+ def install_raw_dependency_groups(groups: Iterable[str]) -> None:
945
+ needed = set(groups) - _INSTALLED_RAW_GROUPS
946
+ if not needed:
947
+ return
948
+ brew_path = ensure_homebrew()
949
+ for group in sorted(needed):
950
+ config = RAW_DEPENDENCY_GROUPS.get(group)
951
+ if not config:
952
+ continue
953
+ # Install system dependencies (Homebrew packages and casks)
954
+ for package in config.get("brew", []):
955
+ ensure_brew_package(brew_path, package)
956
+ for cask in config.get("cask", []):
957
+ ensure_brew_cask(brew_path, cask)
958
+ # NOTE: Python packages (rawpy) are NOT installed at runtime
959
+ # Users must install with: uv tool install smart-media-manager[enhanced]
960
+ # Or manually: pip install rawpy
961
+ # RAW files will be skipped if rawpy is unavailable (detected via import)
962
+ _INSTALLED_RAW_GROUPS.update(needed)
963
+
964
+
965
+ def refine_raw_media(path: Path, extension_candidates: Iterable[Optional[str]]) -> tuple[Optional[MediaFile], Optional[str]]:
966
+ try:
967
+ with rawpy.imread(str(path)) as raw:
968
+ make = (raw.metadata.camera_make or "").strip()
969
+ model = (raw.metadata.camera_model or "").strip()
970
+ format_name = " ".join(part for part in [make, model] if part) or "raw"
971
+ except rawpy.LibRawFileUnsupportedError:
972
+ return None, "non-media: rawpy unsupported raw"
973
+ except Exception as exc: # pragma: no cover - safeguard
974
+ return None, f"rawpy failed: {exc}"
975
+
976
+ chosen_extension: Optional[str] = None
977
+ for candidate in extension_candidates:
978
+ normalized = ensure_dot_extension(candidate)
979
+ if normalized and normalized.lower() in RAW_EXTENSION_TO_GROUPS:
980
+ chosen_extension = normalized
981
+ break
982
+ if not chosen_extension:
983
+ chosen_extension = ensure_dot_extension(path.suffix) or ".raw"
984
+
985
+ media = MediaFile(
986
+ source=path,
987
+ kind="raw",
988
+ extension=chosen_extension,
989
+ format_name=format_name,
990
+ compatible=True,
991
+ original_suffix=path.suffix,
992
+ )
993
+ media.detected_compatible = media.compatible
994
+ return media, None
995
+
996
+
997
+ def refine_image_media(media: MediaFile, skip_compatibility_check: bool = False) -> tuple[Optional[MediaFile], Optional[str]]:
998
+ """
999
+ FAST corruption detection for image files (<10ms for most images).
1000
+
1001
+ Strategy:
1002
+ 1. Format-specific quick checks (EOF markers) - microseconds
1003
+ 2. PIL load() to decode pixels - catches truncation - milliseconds
1004
+
1005
+ Args:
1006
+ media: MediaFile to validate
1007
+ skip_compatibility_check: If True, skip all validation (for testing)
1008
+ """
1009
+ # Skip all validation if flag is set (for format testing)
1010
+ if skip_compatibility_check:
1011
+ return media, None
1012
+
1013
+ # FAST CHECK: Format-specific validation (very quick!)
1014
+ path = media.source
1015
+
1016
+ # JPEG: Check SOI and EOI markers (2 reads, <1ms)
1017
+ if media.extension in (".jpg", ".jpeg"):
1018
+ try:
1019
+ with open(path, "rb") as f:
1020
+ # Check Start of Image marker (FFD8)
1021
+ soi = f.read(2)
1022
+ if soi != b"\xff\xd8":
1023
+ return None, "invalid JPEG: missing SOI marker (FFD8)"
1024
+
1025
+ # Check End of Image marker (FFD9)
1026
+ if path.stat().st_size >= 4: # Must have at least SOI + EOI
1027
+ f.seek(-2, 2)
1028
+ eoi = f.read()
1029
+ if eoi != b"\xff\xd9":
1030
+ return None, "truncated JPEG: missing EOI marker (FFD9)"
1031
+ except OSError as e:
1032
+ return None, f"cannot read JPEG markers: {e}"
1033
+
1034
+ # PNG: Check signature and IEND chunk (2 reads, <1ms)
1035
+ elif media.extension == ".png":
1036
+ try:
1037
+ with open(path, "rb") as f:
1038
+ # Check PNG signature
1039
+ sig = f.read(8)
1040
+ if sig != b"\x89PNG\r\n\x1a\n":
1041
+ return None, "invalid PNG: missing signature"
1042
+
1043
+ # Check for IEND chunk at end (last 12 bytes)
1044
+ file_size = path.stat().st_size
1045
+ if file_size >= 20: # Minimum valid PNG size
1046
+ f.seek(-12, 2)
1047
+ chunk_data = f.read(12)
1048
+ if b"IEND" not in chunk_data:
1049
+ return None, "truncated PNG: missing IEND chunk"
1050
+ except OSError as e:
1051
+ return None, f"cannot read PNG chunks: {e}"
1052
+
1053
+ # SPECIAL CHECK: PSD color mode validation
1054
+ # Apple Photos only supports RGB PSD, not CMYK or other modes
1055
+ if media.extension == ".psd":
1056
+ psd_color_mode = media.metadata.get("psd_color_mode", "unknown")
1057
+ if psd_color_mode == "cmyk":
1058
+ return (
1059
+ None,
1060
+ "CMYK PSD not supported by Photos (requires RGB TIFF conversion)",
1061
+ )
1062
+ elif psd_color_mode in ("lab", "multichannel", "duotone"):
1063
+ return (
1064
+ None,
1065
+ f"{psd_color_mode.upper()} PSD not supported by Photos (requires RGB TIFF conversion)",
1066
+ )
1067
+
1068
+ # COMPREHENSIVE CHECK: Actually decode the image (catches all corruption)
1069
+ # This is still fast (<10ms for most images) but thorough
1070
+ try:
1071
+ # First pass: verify headers
1072
+ with Image.open(path) as img:
1073
+ img.verify()
1074
+
1075
+ # CRITICAL: Second pass - actually decode pixel data
1076
+ # Must reopen because verify() invalidates the image!
1077
+ with Image.open(path) as img:
1078
+ img.load() # Force full decode - catches truncation
1079
+
1080
+ # Sanity check dimensions
1081
+ width, height = img.size
1082
+ if width <= 0 or height <= 0:
1083
+ return None, "invalid image dimensions"
1084
+
1085
+ except Image.DecompressionBombError as e:
1086
+ max_pixels = Image.MAX_IMAGE_PIXELS
1087
+ if max_pixels:
1088
+ return (
1089
+ None,
1090
+ f"image exceeds Pillow pixel limit ({max_pixels} pixels): {e}. Set --max-image-pixels none or SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS=none to disable.",
1091
+ )
1092
+ return (
1093
+ None,
1094
+ f"image exceeds Pillow pixel limit: {e}. Set --max-image-pixels none or SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS=none to disable.",
1095
+ )
1096
+ except (OSError, SyntaxError, ValueError) as e:
1097
+ error_msg = str(e).lower()
1098
+
1099
+ # Classify error type for clear messaging
1100
+ if "truncated" in error_msg:
1101
+ return None, f"truncated or corrupt image data: {e}"
1102
+ elif "cannot identify" in error_msg:
1103
+ return None, f"invalid image format: {e}"
1104
+ else:
1105
+ return None, f"image corruption detected: {e}"
1106
+
1107
+ return media, None
1108
+
1109
+
1110
+ def refine_video_media(media: MediaFile, skip_compatibility_check: bool = False) -> tuple[Optional[MediaFile], Optional[str]]:
1111
+ """
1112
+ Validate video file compatibility with Apple Photos.
1113
+
1114
+ Checks:
1115
+ - Video codec and codec tag (Dolby Vision, avc3/hev1, 10-bit)
1116
+ - Audio codec compatibility (FLAC, Opus, DTS, etc.)
1117
+ - Audio sample rate (must be standard rate)
1118
+ - Audio channel configuration
1119
+
1120
+ Args:
1121
+ media: MediaFile to validate
1122
+ skip_compatibility_check: If True, skip all validation (for testing)
1123
+ """
1124
+ # Skip all validation if flag is set (for format testing)
1125
+ if skip_compatibility_check:
1126
+ return media, None
1127
+
1128
+ ffprobe_path = shutil.which("ffprobe")
1129
+ if not ffprobe_path:
1130
+ return media, None
1131
+
1132
+ # Get BOTH video and audio stream info
1133
+ # Note: Don't fail if audio stream missing, just get what's available
1134
+ cmd = [
1135
+ ffprobe_path,
1136
+ "-v",
1137
+ "error",
1138
+ "-show_entries",
1139
+ "stream=codec_type,codec_name,codec_tag_string,width,height,duration,pix_fmt,profile,sample_rate,channels,channel_layout",
1140
+ "-of",
1141
+ "default=noprint_wrappers=1",
1142
+ str(media.source),
1143
+ ]
1144
+ result = subprocess.run(cmd, capture_output=True, text=True)
1145
+ if result.returncode != 0:
1146
+ return None, "video validation failed"
1147
+
1148
+ output = result.stdout.strip()
1149
+ output_lower = output.lower()
1150
+ media.metadata["ffprobe_info"] = output
1151
+
1152
+ # === VIDEO STREAM VALIDATION ===
1153
+
1154
+ # CRITICAL: Check for incompatible codec tags
1155
+ # Apple requires parameter sets in container (stsd), not in-stream
1156
+ # Look for codec_tag_string field specifically to avoid false positives
1157
+ codec_tag_string = ""
1158
+ for line in output.split("\n"):
1159
+ if "codec_tag_string=" in line.lower():
1160
+ codec_tag_string = line.split("=")[1].strip().lower()
1161
+ break
1162
+
1163
+ incompatible_tags = {
1164
+ "avc3": "H.264 with in-stream parameters (avc3) not compatible; requires avc1",
1165
+ "hev1": "HEVC with in-stream parameters (hev1) not compatible; requires hvc1",
1166
+ "dvhe": "Dolby Vision with in-stream parameters (dvhe) not compatible; requires dvh1",
1167
+ }
1168
+
1169
+ for tag, error_msg in incompatible_tags.items():
1170
+ if tag in codec_tag_string:
1171
+ return None, error_msg
1172
+
1173
+ # Check for Dolby Vision (even dvh1 may have import issues)
1174
+ # Only check codec tag, not entire output (to avoid false positives)
1175
+ if any(tag in codec_tag_string for tag in ["dvh1", "dvav", "dva1"]):
1176
+ return (
1177
+ None,
1178
+ "Dolby Vision HEVC not compatible with Photos (requires standard HEVC transcode)",
1179
+ )
1180
+
1181
+ # Also check for "dolby" in entire output as a backup check
1182
+ if "dolby" in output_lower and "vision" in output_lower:
1183
+ return (
1184
+ None,
1185
+ "Dolby Vision HEVC not compatible with Photos (requires standard HEVC transcode)",
1186
+ )
1187
+
1188
+ # Check for 10-bit color depth
1189
+ if "10le" in output_lower or "10be" in output_lower:
1190
+ return (
1191
+ None,
1192
+ "10-bit color depth not fully compatible with Photos (requires 8-bit transcode)",
1193
+ )
1194
+
1195
+ # === AUDIO STREAM VALIDATION ===
1196
+
1197
+ audio_codec_value = (media.audio_codec or "").lower()
1198
+ if audio_codec_value:
1199
+ unsupported_audio = {
1200
+ "flac": "FLAC audio not supported by Photos (requires AAC transcode)",
1201
+ "opus": "Opus audio not supported by Photos (requires AAC transcode)",
1202
+ "dts": "DTS audio not supported by Photos (requires AC-3/EAC-3 transcode)",
1203
+ "dts-hd": "DTS-HD audio not supported by Photos (requires AC-3/EAC-3 transcode)",
1204
+ "truehd": "Dolby TrueHD audio not supported by Photos (requires AC-3/EAC-3 transcode)",
1205
+ "vorbis": "Vorbis audio not supported by Photos (requires AAC transcode)",
1206
+ }
1207
+
1208
+ for unsupported_codec, error_msg in unsupported_audio.items():
1209
+ if unsupported_codec in audio_codec_value:
1210
+ return None, error_msg
1211
+
1212
+ sample_rate = media.audio_sample_rate
1213
+ if sample_rate is None:
1214
+ current_stream_type = None
1215
+ for line in output.split("\n"):
1216
+ lower = line.lower()
1217
+ if lower.startswith("codec_type="):
1218
+ current_stream_type = lower.split("=", 1)[1].strip()
1219
+ elif current_stream_type == "audio" and lower.startswith("sample_rate="):
1220
+ try:
1221
+ sample_rate = int(lower.split("=", 1)[1].strip())
1222
+ except (ValueError, IndexError):
1223
+ sample_rate = None
1224
+ break
1225
+
1226
+ if sample_rate:
1227
+ standard_rates = {
1228
+ 8000,
1229
+ 11025,
1230
+ 12000,
1231
+ 16000,
1232
+ 22050,
1233
+ 24000,
1234
+ 32000,
1235
+ 44100,
1236
+ 48000,
1237
+ 88200,
1238
+ 96000,
1239
+ 176400,
1240
+ 192000,
1241
+ }
1242
+
1243
+ if sample_rate not in standard_rates:
1244
+ return None, f"Unsupported audio sample rate {sample_rate} Hz (requires resampling to 48000 Hz)"
1245
+
1246
+ return media, None
1247
+
1248
+
1249
+ def run_command_with_progress(command: list[str], message: str, env: Optional[dict[str, str]] = None) -> None:
1250
+ bar_length = 28
1251
+ start = time.time()
1252
+ fd, tmp_name = tempfile.mkstemp(prefix="smm_cmd_", suffix=".log")
1253
+ os.close(fd)
1254
+ capture_path = Path(tmp_name)
1255
+ try:
1256
+ with capture_path.open("w", encoding="utf-8") as capture_writer:
1257
+ with subprocess.Popen(
1258
+ command,
1259
+ stdout=capture_writer,
1260
+ stderr=subprocess.STDOUT,
1261
+ text=True,
1262
+ env=env or os.environ.copy(),
1263
+ ) as proc:
1264
+ while True:
1265
+ ret = proc.poll()
1266
+ elapsed = time.time() - start
1267
+ progress = (elapsed % bar_length) / (bar_length - 1)
1268
+ filled = int(progress * bar_length)
1269
+ bar = "#" * filled + "-" * (bar_length - filled)
1270
+ sys.stdout.write(f"\r{message} [{bar}]")
1271
+ sys.stdout.flush()
1272
+ if ret is not None:
1273
+ break
1274
+ time.sleep(0.2)
1275
+ sys.stdout.write("\r" + " " * (len(message) + bar_length + 3) + "\r")
1276
+ sys.stdout.flush()
1277
+ if proc.returncode != 0:
1278
+ output_tail = ""
1279
+ try:
1280
+ with capture_path.open("r", encoding="utf-8") as capture_reader:
1281
+ data = capture_reader.read()
1282
+ output_tail = data[-4000:].strip()
1283
+ except Exception:
1284
+ output_tail = "(failed to read command output)"
1285
+ error_message = f"Command '{command[0]}' failed with exit code {proc.returncode}."
1286
+ if output_tail:
1287
+ LOG.error("%s Output:\n%s", error_message, output_tail)
1288
+ raise RuntimeError(error_message)
1289
+ finally:
1290
+ with suppress(OSError):
1291
+ capture_path.unlink()
1292
+
1293
+
1294
+ def ensure_homebrew() -> str:
1295
+ global _BREW_PATH_CACHE
1296
+ if _BREW_PATH_CACHE and Path(_BREW_PATH_CACHE).exists():
1297
+ return _BREW_PATH_CACHE
1298
+ brew_path = shutil.which("brew")
1299
+ if brew_path:
1300
+ _BREW_PATH_CACHE = brew_path
1301
+ return brew_path
1302
+ install_cmd = [
1303
+ "/bin/bash",
1304
+ "-lc",
1305
+ 'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"',
1306
+ ]
1307
+ run_command_with_progress(install_cmd, "Installing Homebrew")
1308
+ possible_paths = ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]
1309
+ for candidate in possible_paths:
1310
+ if Path(candidate).exists():
1311
+ os.environ["PATH"] = f"{Path(candidate).parent}:{os.environ.get('PATH', '')}"
1312
+ _BREW_PATH_CACHE = str(Path(candidate))
1313
+ return _BREW_PATH_CACHE
1314
+ brew_path = shutil.which("brew")
1315
+ if not brew_path:
1316
+ raise RuntimeError("Homebrew installation succeeded but brew binary not found in PATH.")
1317
+ _BREW_PATH_CACHE = brew_path
1318
+ return brew_path
1319
+
1320
+
1321
+ def brew_package_installed(brew_path: str, package: str) -> bool:
1322
+ check_cmd = [brew_path, "list", package]
1323
+ result = subprocess.run(check_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
1324
+ return result.returncode == 0
1325
+
1326
+
1327
+ def ensure_brew_package(brew_path: str, package: str) -> None:
1328
+ if not brew_package_installed(brew_path, package):
1329
+ try:
1330
+ run_command_with_progress([brew_path, "install", "--quiet", package], f"Installing {package}")
1331
+ except RuntimeError as exc: # pragma: no cover - depends on user env
1332
+ raise RuntimeError(f"Failed to install {package} via Homebrew. Install it manually (brew install {package}) or rerun with --skip-bootstrap.") from exc
1333
+ else:
1334
+ LOG.debug("Package %s already installed; skipping upgrade to avoid repeated downloads.", package)
1335
+
1336
+
1337
+ def brew_cask_installed(brew_path: str, cask: str) -> bool:
1338
+ check_cmd = [brew_path, "list", "--cask", cask]
1339
+ result = subprocess.run(check_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
1340
+ return result.returncode == 0
1341
+
1342
+
1343
+ def ensure_brew_cask(brew_path: str, cask: str) -> None:
1344
+ if not brew_cask_installed(brew_path, cask):
1345
+ try:
1346
+ run_command_with_progress([brew_path, "install", "--cask", "--quiet", cask], f"Installing {cask}")
1347
+ except RuntimeError as exc: # pragma: no cover
1348
+ raise RuntimeError(f"Failed to install {cask} via Homebrew. Install it manually (brew install --cask {cask}) or rerun with --skip-bootstrap.") from exc
1349
+ else:
1350
+ LOG.debug("Cask %s already installed; skipping upgrade to avoid repeated downloads.", cask)
1351
+
1352
+
1353
+ def pip_package_installed(package: str) -> bool:
1354
+ if package in _PIP_PACKAGE_CACHE:
1355
+ return True
1356
+ check_cmd = [sys.executable, "-m", "pip", "show", package]
1357
+ result = subprocess.run(check_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
1358
+ if result.returncode == 0:
1359
+ _PIP_PACKAGE_CACHE.add(package)
1360
+ return True
1361
+ return False
1362
+
1363
+
1364
+ def ensure_pip_package(package: str) -> None:
1365
+ try:
1366
+ if not pip_package_installed(package):
1367
+ run_command_with_progress(
1368
+ [sys.executable, "-m", "pip", "install", "--upgrade", package],
1369
+ f"Installing {package}",
1370
+ )
1371
+ _PIP_PACKAGE_CACHE.add(package)
1372
+ else:
1373
+ run_command_with_progress(
1374
+ [sys.executable, "-m", "pip", "install", "--upgrade", package],
1375
+ f"Updating {package}",
1376
+ )
1377
+ except RuntimeError as exc:
1378
+ # Pip install failed (likely compilation issues for packages with C extensions like rawpy)
1379
+ # Log warning and continue - files requiring this package will be skipped
1380
+ LOG.warning(
1381
+ "Failed to install Python package '%s': %s. Files requiring this package will be skipped. Try installing manually with 'pip install %s' or use --skip-bootstrap to bypass.",
1382
+ package,
1383
+ exc,
1384
+ package,
1385
+ )
1386
+
1387
+
1388
+ def ensure_system_dependencies() -> None:
1389
+ brew_path = ensure_homebrew()
1390
+ for package in REQUIRED_BREW_PACKAGES.values():
1391
+ ensure_brew_package(brew_path, package)
1392
+
1393
+
1394
+ def copy_metadata_from_source(source: Path, target: Path) -> None:
1395
+ """Copy all metadata from source to target using exiftool with comprehensive field translation.
1396
+
1397
+ Uses exiftool's built-in metadata translation to handle cross-format field mapping:
1398
+ - EXIF:DateTimeOriginal → XMP:CreateDate (when needed)
1399
+ - IPTC:Caption → XMP:Description (when needed)
1400
+ - Preserves GPS, copyright, camera info, etc.
1401
+
1402
+ ExifTool automatically normalizes field names across EXIF, IPTC, and XMP standards,
1403
+ acting as a metadata translation layer similar to our UUID system for format names.
1404
+ """
1405
+ exiftool = find_executable("exiftool")
1406
+ if not exiftool or not source.exists() or not target.exists():
1407
+ return
1408
+ cmd = [
1409
+ exiftool,
1410
+ "-overwrite_original",
1411
+ "-TagsFromFile",
1412
+ str(source),
1413
+ "-all:all", # Copy all writable tags preserving group structure
1414
+ "-unsafe", # Include normally unsafe tags (needed for some JPEG repairs)
1415
+ str(target),
1416
+ ]
1417
+ try:
1418
+ subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
1419
+ LOG.debug("Metadata copied from %s to %s via exiftool", source.name, target.name)
1420
+ except Exception as e:
1421
+ LOG.debug("Exiftool metadata copy failed for %s -> %s: %s", source, target, e)
1422
+
1423
+
1424
+ def ensure_raw_dependencies_for_files(media_files: Iterable[MediaFile]) -> None:
1425
+ required_groups: set[str] = set()
1426
+ for media in media_files:
1427
+ required_groups.update(collect_raw_groups_from_extensions([media.extension, media.original_suffix]))
1428
+ if not required_groups:
1429
+ return
1430
+ install_raw_dependency_groups(required_groups)
1431
+
1432
+
1433
+ def normalize_mime_value(mime: Optional[str]) -> Optional[str]:
1434
+ if not mime:
1435
+ return None
1436
+ normalized = mime.strip().lower()
1437
+ return normalized or None
1438
+
1439
+
1440
+ def is_textual_mime(mime: Optional[str]) -> bool:
1441
+ mime_val = normalize_mime_value(mime)
1442
+ if not mime_val:
1443
+ return False
1444
+ if mime_val.startswith("text/"):
1445
+ return True
1446
+ return mime_val in TEXTUAL_MIME_HINTS
1447
+
1448
+
1449
+ def ensure_dot_extension(ext: Optional[str]) -> Optional[str]:
1450
+ if not ext:
1451
+ return None
1452
+ normalized = ext.strip().lower()
1453
+ if not normalized:
1454
+ return None
1455
+ if not normalized.startswith("."):
1456
+ normalized = f".{normalized}"
1457
+ return normalized
1458
+
1459
+
1460
+ def canonicalize_extension(ext: Optional[str]) -> Optional[str]:
1461
+ """
1462
+ Canonicalize media file extension variants to preferred forms.
1463
+
1464
+ Examples:
1465
+ .jfif, .jpeg → .jpg
1466
+ .tif → .tiff
1467
+
1468
+ This ensures consistent extension naming regardless of which detection tool
1469
+ returned the extension. Only handles media files (image/video/RAW formats).
1470
+ Non-media files (HTML, text, etc.) are filtered out before this function is called.
1471
+ """
1472
+ if not ext:
1473
+ return None
1474
+
1475
+ # Ensure normalized form (lowercase, with dot)
1476
+ normalized = ensure_dot_extension(ext)
1477
+ if not normalized:
1478
+ return None
1479
+
1480
+ # Canonical extension mappings for MEDIA FILES ONLY
1481
+ # Format: variant → canonical
1482
+ CANONICAL_EXTENSIONS = {
1483
+ # JPEG variants → .jpg
1484
+ ".jfif": ".jpg",
1485
+ ".jpeg": ".jpg",
1486
+ ".jpe": ".jpg",
1487
+ # TIFF variants → .tiff
1488
+ ".tif": ".tiff",
1489
+ # Add more media format variants as needed based on detection tool outputs
1490
+ }
1491
+
1492
+ return CANONICAL_EXTENSIONS.get(normalized, normalized)
1493
+
1494
+
1495
+ def kind_from_mime(mime: Optional[str]) -> Optional[str]:
1496
+ mime_val = normalize_mime_value(mime)
1497
+ if not mime_val:
1498
+ return None
1499
+ if mime_val.startswith("image/"):
1500
+ return "image"
1501
+ if mime_val.startswith("video/"):
1502
+ return "video"
1503
+ if mime_val.startswith("audio/"):
1504
+ return "audio"
1505
+ return None
1506
+
1507
+
1508
+ def kind_from_extension(ext: Optional[str]) -> Optional[str]:
1509
+ norm = normalize_extension(ext)
1510
+ if not norm:
1511
+ return None
1512
+ ext_with_dot = ensure_dot_extension(norm)
1513
+ if ext_with_dot and ext_with_dot.lower() in RAW_EXTENSION_TO_GROUPS:
1514
+ return "raw"
1515
+ if ext_with_dot in COMPATIBLE_IMAGE_EXTENSIONS or norm in ALL_IMAGE_EXTENSIONS:
1516
+ return "image"
1517
+ if ext_with_dot in VIDEO_EXTENSION_MAP.values():
1518
+ return "video"
1519
+ return None
1520
+
1521
+
1522
+ def kind_from_description(description: Optional[str]) -> Optional[str]:
1523
+ if not description:
1524
+ return None
1525
+ lowered = description.lower()
1526
+ if "disk image" not in lowered and any(word in lowered for word in ("image", "jpeg", "jpg", "png", "photo", "bitmap")):
1527
+ return "image"
1528
+ if any(word in lowered for word in ("video", "movie", "mpeg", "quicktime", "mp4", "h264", "h.264")):
1529
+ return "video"
1530
+ if any(word in lowered for word in ("audio", "sound", "mp3", "aac", "alac")):
1531
+ return "audio"
1532
+ if any(
1533
+ word in lowered
1534
+ for word in (
1535
+ "raw",
1536
+ "cr2",
1537
+ "cr3",
1538
+ "nef",
1539
+ "arw",
1540
+ "raf",
1541
+ "orf",
1542
+ "rw2",
1543
+ "dng",
1544
+ "iiq",
1545
+ "3fr",
1546
+ "x3f",
1547
+ )
1548
+ ):
1549
+ return "raw"
1550
+ return None
1551
+
1552
+
1553
+ def extension_from_mime(mime: Optional[str]) -> Optional[str]:
1554
+ mime_val = normalize_mime_value(mime)
1555
+ if not mime_val:
1556
+ return None
1557
+ ext = IMAGE_MIME_EXTENSION_MAP.get(mime_val)
1558
+ if not ext:
1559
+ ext = VIDEO_MIME_EXTENSION_MAP.get(mime_val)
1560
+ if not ext:
1561
+ ext = mimetypes.guess_extension(mime_val)
1562
+ return ensure_dot_extension(ext)
1563
+
1564
+
1565
+ def extension_from_description(description: Optional[str]) -> Optional[str]:
1566
+ if not description:
1567
+ return None
1568
+ lowered = description.lower()
1569
+ mapping = {
1570
+ ".jpg": ("jpeg", "jpg"),
1571
+ ".png": ("png",),
1572
+ ".gif": ("gif",),
1573
+ ".bmp": ("bitmap", "bmp"),
1574
+ ".tiff": ("tiff", "tif"),
1575
+ ".heic": ("heic", "heif"),
1576
+ ".mp4": ("mp4", "mpeg-4", "h.264", "h264"),
1577
+ ".mov": ("quicktime", "mov"),
1578
+ ".m4v": ("m4v",),
1579
+ ".webm": ("webm",),
1580
+ ".avi": ("avi",),
1581
+ ".mkv": ("matroska", "mkv"),
1582
+ }
1583
+ for ext, keywords in mapping.items():
1584
+ if any(keyword in lowered for keyword in keywords):
1585
+ return ext
1586
+ return None
1587
+
1588
+
1589
+ def is_supported_video_codec(codec: Optional[str]) -> bool:
1590
+ if not codec:
1591
+ return False
1592
+ codec_lower = codec.lower()
1593
+ return codec_lower in COMPATIBLE_VIDEO_CODECS
1594
+
1595
+
1596
+ def choose_vote_by_priority(
1597
+ votes: Iterable[FormatVote],
1598
+ predicate: Callable[[FormatVote], bool],
1599
+ ) -> Optional[FormatVote]:
1600
+ for tool in TOOL_PRIORITY:
1601
+ for vote in votes:
1602
+ if vote.tool == tool and predicate(vote):
1603
+ return vote
1604
+ return None
1605
+
1606
+
1607
+ def select_consensus_vote(votes: list[FormatVote]) -> Optional[FormatVote]:
1608
+ valid_votes = [vote for vote in votes if not vote.error and (vote.mime or vote.extension or vote.description)]
1609
+ if not valid_votes:
1610
+ return None
1611
+
1612
+ mime_weights: dict[str, float] = {}
1613
+ for vote in valid_votes:
1614
+ mime_val = normalize_mime_value(vote.mime)
1615
+ if mime_val:
1616
+ mime_weights[mime_val] = mime_weights.get(mime_val, 0.0) + vote_weight(vote)
1617
+ if mime_weights:
1618
+ top_weight = max(mime_weights.values())
1619
+ top_mimes = {mime for mime, weight in mime_weights.items() if math.isclose(weight, top_weight, rel_tol=1e-9, abs_tol=1e-9)}
1620
+ choice = choose_vote_by_priority(valid_votes, lambda v: normalize_mime_value(v.mime) in top_mimes)
1621
+ if choice:
1622
+ return choice
1623
+
1624
+ ext_weights: dict[str, float] = {}
1625
+ for vote in valid_votes:
1626
+ ext_val = ensure_dot_extension(vote.extension)
1627
+ if ext_val:
1628
+ ext_weights[ext_val] = ext_weights.get(ext_val, 0.0) + vote_weight(vote)
1629
+ if ext_weights:
1630
+ top_weight = max(ext_weights.values())
1631
+ top_exts = {ext for ext, weight in ext_weights.items() if math.isclose(weight, top_weight, rel_tol=1e-9, abs_tol=1e-9)}
1632
+ choice = choose_vote_by_priority(valid_votes, lambda v: ensure_dot_extension(v.extension) in top_exts)
1633
+ if choice:
1634
+ return choice
1635
+
1636
+ return max(
1637
+ valid_votes,
1638
+ key=lambda v: (vote_weight(v), -tool_rank(v.tool)),
1639
+ default=None,
1640
+ )
1641
+
1642
+
1643
+ def determine_media_kind(votes: list[FormatVote], consensus: Optional[FormatVote]) -> Optional[str]:
1644
+ kind_weights: dict[str, float] = {}
1645
+ candidate_votes: list[FormatVote] = []
1646
+ for vote in votes:
1647
+ if vote.error:
1648
+ continue
1649
+ inferred = vote.kind or kind_from_mime(vote.mime) or kind_from_extension(vote.extension) or kind_from_description(vote.description)
1650
+ if inferred:
1651
+ weight = vote_weight(vote)
1652
+ kind_weights[inferred] = kind_weights.get(inferred, 0.0) + weight
1653
+ candidate_votes.append(vote)
1654
+
1655
+ if kind_weights:
1656
+ top_weight = max(kind_weights.values())
1657
+ top_kinds = {kind for kind, weight in kind_weights.items() if math.isclose(weight, top_weight, rel_tol=1e-9, abs_tol=1e-9)}
1658
+ if consensus:
1659
+ consensus_kind = consensus.kind or kind_from_mime(consensus.mime) or kind_from_extension(consensus.extension) or kind_from_description(consensus.description)
1660
+ if consensus_kind and consensus_kind in top_kinds:
1661
+ return consensus_kind
1662
+ choice = choose_vote_by_priority(
1663
+ candidate_votes,
1664
+ lambda v: (v.kind or kind_from_mime(v.mime) or kind_from_extension(v.extension) or kind_from_description(v.description)) in top_kinds,
1665
+ )
1666
+ if choice:
1667
+ return choice.kind or kind_from_mime(choice.mime) or kind_from_extension(choice.extension) or kind_from_description(choice.description)
1668
+
1669
+ if consensus:
1670
+ return consensus.kind or kind_from_mime(consensus.mime) or kind_from_extension(consensus.extension) or kind_from_description(consensus.description)
1671
+ return None
1672
+
1673
+
1674
+ def votes_error_summary(votes: list[FormatVote]) -> str:
1675
+ error_messages = [f"{vote.tool}: {vote.error}" for vote in votes if vote.error]
1676
+ if error_messages:
1677
+ return "; ".join(error_messages)
1678
+ return "detectors could not agree on a media format"
1679
+
1680
+
1681
+ def collect_format_votes(path: Path, puremagic_signature: Optional[Signature] = None) -> list[FormatVote]:
1682
+ return [
1683
+ classify_with_libmagic(path),
1684
+ classify_with_puremagic(path, puremagic_signature),
1685
+ classify_with_pyfsig(path),
1686
+ classify_with_binwalk(path),
1687
+ ]
1688
+
1689
+
1690
+ def classify_with_libmagic(path: Path) -> FormatVote:
1691
+ if magic is None:
1692
+ return FormatVote(tool="libmagic", error="libmagic not yet installed")
1693
+ global _MAGIC_MIME, _MAGIC_DESC
1694
+ try:
1695
+ if _MAGIC_MIME is None:
1696
+ _MAGIC_MIME = magic.Magic(mime=True)
1697
+ if _MAGIC_DESC is None:
1698
+ _MAGIC_DESC = magic.Magic()
1699
+ raw_mime = _MAGIC_MIME.from_file(str(path)) if _MAGIC_MIME else None
1700
+ mime = normalize_mime_value(raw_mime)
1701
+ description = _MAGIC_DESC.from_file(str(path)) if _MAGIC_DESC else None
1702
+ extension = extension_from_mime(mime) or extension_from_description(description)
1703
+ kind = kind_from_mime(mime) or kind_from_description(description)
1704
+ if not mime and not description:
1705
+ return FormatVote(tool="libmagic", error="no match")
1706
+ return FormatVote(
1707
+ tool="libmagic",
1708
+ mime=mime,
1709
+ description=description,
1710
+ extension=extension,
1711
+ kind=kind,
1712
+ )
1713
+ except Exception as exc: # pragma: no cover - runtime safety
1714
+ return FormatVote(tool="libmagic", error=str(exc))
1715
+
1716
+
1717
+ def classify_with_puremagic(path: Path, signature: Optional[Signature] = None) -> FormatVote:
1718
+ if signature is None:
1719
+ signature = safe_puremagic_guess(path)
1720
+ if signature.is_empty():
1721
+ return FormatVote(tool="puremagic", error="no match")
1722
+ extension = None
1723
+ if signature.extension:
1724
+ image_ext = canonical_image_extension(signature.extension)
1725
+ video_ext = canonical_video_extension(signature.extension)
1726
+ extension = image_ext or video_ext or ensure_dot_extension(signature.extension)
1727
+ mime = normalize_mime_value(signature.mime)
1728
+ kind = kind_from_mime(mime) or kind_from_extension(extension)
1729
+ description = None
1730
+ if signature.mime:
1731
+ description = signature.mime
1732
+ return FormatVote(
1733
+ tool="puremagic",
1734
+ mime=mime,
1735
+ extension=extension,
1736
+ description=description,
1737
+ kind=kind,
1738
+ )
1739
+
1740
+
1741
+ def classify_with_pyfsig(path: Path) -> FormatVote:
1742
+ try:
1743
+ matches = pyfsig_interface.find_matches_for_file_path(str(path))
1744
+ except Exception as exc: # pragma: no cover - runtime safety
1745
+ return FormatVote(tool="pyfsig", error=str(exc))
1746
+ if not matches:
1747
+ return FormatVote(tool="pyfsig", error="no signature match")
1748
+ match = matches[0]
1749
+ extension = ensure_dot_extension(match.file_extension)
1750
+ description = match.description
1751
+ kind = kind_from_extension(extension) or kind_from_description(description)
1752
+ return FormatVote(
1753
+ tool="pyfsig",
1754
+ extension=extension,
1755
+ description=description,
1756
+ kind=kind,
1757
+ )
1758
+
1759
+
1760
+ def classify_with_binwalk(path: Path) -> FormatVote:
1761
+ if not BINWALK_EXECUTABLE:
1762
+ return FormatVote(tool="binwalk", error="binwalk executable not found")
1763
+ try:
1764
+ result = subprocess.run(
1765
+ [BINWALK_EXECUTABLE, "--signature", "--length", "0", str(path)],
1766
+ capture_output=True,
1767
+ text=True,
1768
+ check=False,
1769
+ )
1770
+ except Exception as exc: # pragma: no cover - runtime safety
1771
+ return FormatVote(tool="binwalk", error=str(exc))
1772
+ if result.returncode not in (0, 1): # binwalk returns 1 when no signatures match
1773
+ return FormatVote(
1774
+ tool="binwalk",
1775
+ error=result.stderr.strip() or f"exit code {result.returncode}",
1776
+ )
1777
+ description = None
1778
+ for line in result.stdout.splitlines():
1779
+ stripped = line.strip()
1780
+ if not stripped or stripped.upper().startswith("DECIMAL") or stripped.startswith("--"):
1781
+ continue
1782
+ parts = stripped.split(None, 2)
1783
+ if len(parts) == 3:
1784
+ description = parts[2]
1785
+ break
1786
+ if not description:
1787
+ return FormatVote(tool="binwalk", error="no signature match")
1788
+ extension = extension_from_description(description)
1789
+ kind = kind_from_description(description) or kind_from_extension(extension)
1790
+ return FormatVote(
1791
+ tool="binwalk",
1792
+ description=description,
1793
+ extension=extension,
1794
+ kind=kind,
1795
+ )
1796
+
1797
+
1798
+ def sanitize_path_string(path_str: str) -> str:
1799
+ """Clean and normalize path string, handling unicode and control characters.
1800
+
1801
+ Args:
1802
+ path_str: Raw path string that may contain unicode, diacritics, or control characters
1803
+
1804
+ Returns:
1805
+ Sanitized path string with normalized unicode and stripped control characters
1806
+ """
1807
+ import re
1808
+ import unicodedata
1809
+
1810
+ # Remove leading/trailing whitespace
1811
+ cleaned = path_str.strip()
1812
+
1813
+ # Strip control characters (U+0000 to U+001F and U+007F to U+009F)
1814
+ # but preserve path separators and valid unicode characters
1815
+ control_chars = "".join(chr(i) for i in range(0, 32)) + "".join(chr(i) for i in range(127, 160))
1816
+ cleaned = cleaned.translate(str.maketrans("", "", control_chars))
1817
+
1818
+ # Normalize unicode to NFC (Canonical Decomposition, followed by Canonical Composition)
1819
+ # This handles diacritics and other language-specific characters consistently
1820
+ try:
1821
+ cleaned = unicodedata.normalize("NFC", cleaned)
1822
+ except (ValueError, TypeError) as e:
1823
+ # If normalization fails, try NFKC (compatibility normalization)
1824
+ try:
1825
+ cleaned = unicodedata.normalize("NFKC", cleaned)
1826
+ except (ValueError, TypeError):
1827
+ # If both fail, continue with the cleaned string
1828
+ LOG.warning(f"Unicode normalization failed for path: {e}")
1829
+
1830
+ # Remove any remaining invalid or problematic characters for file paths
1831
+ # Keep: letters, digits, spaces, and common path characters (. - _ / \\ :)
1832
+ # This is more permissive to allow international file names
1833
+ cleaned = re.sub(r'[<>"|?*\x00-\x1f\x7f-\x9f]', "", cleaned)
1834
+
1835
+ # Final strip to remove any whitespace that may have been exposed
1836
+ cleaned = cleaned.strip()
1837
+
1838
+ return cleaned
1839
+
1840
+
1841
+ def validate_path_argument(path_str: str) -> Path:
1842
+ """Validate and convert path string to Path object with comprehensive error checking.
1843
+
1844
+ Args:
1845
+ path_str: Path string from command line argument
1846
+
1847
+ Returns:
1848
+ Validated Path object
1849
+
1850
+ Raises:
1851
+ argparse.ArgumentTypeError: If path is invalid, doesn't exist, is empty,
1852
+ has permission issues, or is on an unmounted volume
1853
+ """
1854
+ # Sanitize the path string
1855
+ cleaned_str = sanitize_path_string(path_str)
1856
+
1857
+ if not cleaned_str:
1858
+ raise argparse.ArgumentTypeError("Path cannot be empty after sanitization")
1859
+
1860
+ # Convert to Path object
1861
+ try:
1862
+ path = Path(cleaned_str).expanduser().resolve()
1863
+ except (ValueError, RuntimeError, OSError) as e:
1864
+ raise argparse.ArgumentTypeError(f"Invalid path: {e}")
1865
+
1866
+ # Check if path exists
1867
+ if not path.exists():
1868
+ # Check if it's on an unmounted volume or network path
1869
+ parent = path.parent
1870
+ if parent.exists():
1871
+ # Parent exists but file/dir doesn't - likely deleted/moved
1872
+ raise argparse.ArgumentTypeError(f"Path does not exist: {path}")
1873
+ else:
1874
+ # Parent doesn't exist - might be unmounted volume
1875
+ raise argparse.ArgumentTypeError(f"Path does not exist (unmounted volume or network path?): {path}")
1876
+
1877
+ # Check if we have read permissions
1878
+ try:
1879
+ # For directories, try to list contents
1880
+ if path.is_dir():
1881
+ try:
1882
+ next(path.iterdir(), None)
1883
+ except PermissionError:
1884
+ raise argparse.ArgumentTypeError(f"Permission denied: Cannot read directory {path}")
1885
+ except OSError as e:
1886
+ raise argparse.ArgumentTypeError(f"Cannot access directory {path}: {e}")
1887
+ # For files, try to open and read
1888
+ else:
1889
+ try:
1890
+ # Check if file is readable
1891
+ with path.open("rb") as f:
1892
+ # Try to read first byte to check if file is accessible
1893
+ f.read(1)
1894
+ except PermissionError:
1895
+ raise argparse.ArgumentTypeError(f"Permission denied: Cannot read file {path}")
1896
+ except OSError as e:
1897
+ # Could be corrupt, on unmounted volume, or other I/O error
1898
+ raise argparse.ArgumentTypeError(f"Cannot read file {path}: {e}")
1899
+
1900
+ # Check if file is empty (warn but don't fail - might be intentional for testing)
1901
+ if path.stat().st_size == 0:
1902
+ # Note: We don't raise an error here because empty files might be intentional
1903
+ # The CLI will handle this later in the processing pipeline
1904
+ LOG.warning(f"File is empty: {path}")
1905
+
1906
+ except argparse.ArgumentTypeError:
1907
+ # Re-raise our custom errors
1908
+ raise
1909
+ except Exception as e:
1910
+ # Catch any other unexpected errors
1911
+ raise argparse.ArgumentTypeError(f"Error validating path {path}: {e}")
1912
+
1913
+ return path
1914
+
1915
+
1916
+ def check_write_permission(directory: Path, operation_name: str = "write") -> None:
1917
+ """Check if we have write permissions in the given directory.
1918
+
1919
+ Args:
1920
+ directory: Directory to check for write permissions
1921
+ operation_name: Description of the operation needing write access (for error messages)
1922
+
1923
+ Raises:
1924
+ PermissionError: If directory is not writable with a clear error message
1925
+ OSError: If directory cannot be accessed for other reasons
1926
+ """
1927
+ import tempfile
1928
+
1929
+ if not directory.exists():
1930
+ raise OSError(f"Directory does not exist: {directory}")
1931
+
1932
+ if not directory.is_dir():
1933
+ raise OSError(f"Path is not a directory: {directory}")
1934
+
1935
+ # Try to create a temporary file to test write permissions
1936
+ try:
1937
+ with tempfile.NamedTemporaryFile(dir=directory, delete=True) as tmp:
1938
+ # Successfully created and can write
1939
+ tmp.write(b"test")
1940
+ except PermissionError:
1941
+ raise PermissionError(f"Permission denied: Cannot {operation_name} in directory {directory}\nPlease check that you have write permissions for this location.")
1942
+ except OSError as e:
1943
+ raise OSError(f"Cannot {operation_name} in directory {directory}: {e}")
1944
+
1945
+
1946
+ def parse_max_image_pixels(value: str) -> Optional[int]:
1947
+ normalized = value.strip().lower()
1948
+ if normalized in {"none", "disable", "disabled", "off", "0"}:
1949
+ return None
1950
+ try:
1951
+ pixels = int(normalized)
1952
+ except ValueError as exc:
1953
+ raise argparse.ArgumentTypeError("max image pixels must be a positive integer or 'none' to disable") from exc
1954
+ if pixels <= 0:
1955
+ raise argparse.ArgumentTypeError("max image pixels must be a positive integer or 'none' to disable")
1956
+ return pixels
1957
+
1958
+
1959
+ def configure_pillow_max_image_pixels(max_image_pixels: Optional[int]) -> None:
1960
+ Image.MAX_IMAGE_PIXELS = max_image_pixels
1961
+ if max_image_pixels is None:
1962
+ LOG.info("Pillow decompression-bomb protection disabled.")
1963
+ else:
1964
+ LOG.info("Pillow MAX_IMAGE_PIXELS set to %s", max_image_pixels)
1965
+
1966
+
1967
+ def parse_args() -> argparse.Namespace:
1968
+ parser = argparse.ArgumentParser(
1969
+ prog="smart-media-manager",
1970
+ description="Scan and import media into Apple Photos, fixing extensions and compatibility.",
1971
+ epilog="Examples:\n %(prog)s /path/to/media --recursive\n %(prog)s /path/to/image.jpg\n %(prog)s # scans current directory",
1972
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1973
+ )
1974
+ parser.add_argument(
1975
+ "path",
1976
+ nargs="?",
1977
+ default=None, # Changed from Path.cwd() to allow special handling
1978
+ type=validate_path_argument, # Use custom validation function
1979
+ metavar="PATH",
1980
+ help="Directory to scan (default: current directory) or path to a single file",
1981
+ )
1982
+ parser.add_argument(
1983
+ "--delete",
1984
+ action="store_true",
1985
+ help="Delete the temporary FOUND_MEDIA_FILES_<timestamp> folder after a successful import.",
1986
+ )
1987
+ parser.add_argument(
1988
+ "--recursive",
1989
+ action="store_true",
1990
+ help="Recursively scan the folder for media files.",
1991
+ )
1992
+ parser.add_argument(
1993
+ "--follow-symlinks",
1994
+ action="store_true",
1995
+ help="Follow symbolic links when scanning.",
1996
+ )
1997
+ parser.add_argument(
1998
+ "--skip-bootstrap",
1999
+ action="store_true",
2000
+ help="Skip automatic dependency installation (requires prerequisites already installed).",
2001
+ )
2002
+ parser.add_argument(
2003
+ "--skip-convert",
2004
+ action="store_true",
2005
+ help="Skip format conversion/transcoding. Files must already be Photos-compatible. Useful for testing raw compatibility.",
2006
+ )
2007
+ parser.add_argument(
2008
+ "--skip-compatibility-check",
2009
+ action="store_true",
2010
+ help="Skip all compatibility validation checks. ⚠️ WARNING: May cause Photos import errors! Use only for format testing.",
2011
+ )
2012
+ parser.add_argument(
2013
+ "--max-image-pixels",
2014
+ type=parse_max_image_pixels,
2015
+ default=MAX_IMAGE_PIXELS_UNSET,
2016
+ help="Set Pillow image pixel limit; use 'none' to disable (default: none).",
2017
+ )
2018
+ parser.add_argument(
2019
+ "--album",
2020
+ type=str,
2021
+ default="Smart Media Manager",
2022
+ help="Photos album name to import into (default: 'Smart Media Manager').",
2023
+ )
2024
+ parser.add_argument(
2025
+ "--skip-duplicate-check",
2026
+ action="store_true",
2027
+ default=False,
2028
+ help="Skip duplicate checking during import (faster but may import duplicates). Default: check for duplicates and prompt user.",
2029
+ )
2030
+ parser.add_argument(
2031
+ "--copy",
2032
+ dest="copy_mode",
2033
+ action="store_true",
2034
+ help="Copy files into staging instead of moving them (originals are left untouched).",
2035
+ )
2036
+ parser.add_argument(
2037
+ "-y",
2038
+ "--yes",
2039
+ "--assume-yes",
2040
+ dest="assume_yes",
2041
+ action="store_true",
2042
+ help="Skip confirmation prompt before scanning. Useful for automation and tests.",
2043
+ )
2044
+ parser.add_argument(
2045
+ "--version",
2046
+ action="version",
2047
+ version=f"%(prog)s {__version__}",
2048
+ help="Show the smart-media-manager version and exit.",
2049
+ )
2050
+
2051
+ args = parser.parse_args()
2052
+
2053
+ # Environment override to avoid interactive prompt (CI/testing)
2054
+ if not args.assume_yes:
2055
+ env_assume = os.environ.get("SMART_MEDIA_MANAGER_ASSUME_YES")
2056
+ if env_assume and env_assume.strip().lower() not in {"0", "false", "no"}:
2057
+ args.assume_yes = True
2058
+
2059
+ if args.max_image_pixels is MAX_IMAGE_PIXELS_UNSET:
2060
+ env_max_pixels = os.environ.get("SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS")
2061
+ if env_max_pixels and env_max_pixels.strip():
2062
+ try:
2063
+ args.max_image_pixels = parse_max_image_pixels(env_max_pixels)
2064
+ except argparse.ArgumentTypeError as exc:
2065
+ parser.error(f"Invalid SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS: {exc}")
2066
+ else:
2067
+ args.max_image_pixels = None
2068
+
2069
+ # Handle default path (current directory) if no path provided
2070
+ if args.path is None:
2071
+ args.path = Path.cwd()
2072
+
2073
+ # In copy mode the user likely wants to keep originals; implicit yes to prompt if flag set
2074
+ if args.copy_mode:
2075
+ args.assume_yes = True
2076
+
2077
+ return args
2078
+
2079
+
2080
+ def ensure_dependency(name: str) -> None:
2081
+ if shutil.which(name) is None:
2082
+ raise RuntimeError(f"Required dependency '{name}' is not available on PATH.")
2083
+
2084
+
2085
+ def ffprobe(path: Path) -> Optional[dict[str, Any]]:
2086
+ cmd = [
2087
+ "ffprobe",
2088
+ "-v",
2089
+ "error",
2090
+ "-print_format",
2091
+ "json",
2092
+ "-show_streams",
2093
+ "-show_format",
2094
+ str(path),
2095
+ ]
2096
+ result = subprocess.run(
2097
+ cmd,
2098
+ capture_output=True,
2099
+ text=True,
2100
+ check=False,
2101
+ )
2102
+ if result.returncode != 0:
2103
+ return None
2104
+ try:
2105
+ return json.loads(result.stdout) # type: ignore[no-any-return]
2106
+ except json.JSONDecodeError:
2107
+ return None
2108
+
2109
+
2110
+ def extract_and_normalize_metadata(probe_data: dict[str, Any]) -> dict[str, Any]:
2111
+ """
2112
+ Extract metadata from ffprobe JSON and normalize field names to UUIDs.
2113
+
2114
+ Extracts metadata from both format-level and stream-level tags, then uses
2115
+ the metadata registry to translate ffprobe field names to canonical UUIDs.
2116
+
2117
+ Args:
2118
+ probe_data: FFprobe JSON output with 'format' and 'streams' keys
2119
+
2120
+ Returns:
2121
+ Dictionary with UUID keys mapping to metadata values
2122
+
2123
+ Example:
2124
+ >>> probe = {"format": {"tags": {"creation_time": "2024-01-15"}}}
2125
+ >>> metadata = extract_and_normalize_metadata(probe)
2126
+ >>> # Returns: {'3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M': '2024-01-15'}
2127
+ """
2128
+ raw_metadata: dict[str, Any] = {}
2129
+
2130
+ # Extract format-level tags (creation_time, artist, title, etc.)
2131
+ format_info = probe_data.get("format", {})
2132
+ format_tags = format_info.get("tags", {})
2133
+ if format_tags:
2134
+ # FFprobe tags can have mixed case, normalize to lowercase keys
2135
+ for key, value in format_tags.items():
2136
+ # Store with lowercase key for consistency
2137
+ raw_metadata[key.lower()] = value
2138
+
2139
+ # Extract stream-level tags (for multi-stream files)
2140
+ streams = probe_data.get("streams", [])
2141
+ for stream in streams:
2142
+ stream_tags = stream.get("tags", {})
2143
+ if stream_tags:
2144
+ for key, value in stream_tags.items():
2145
+ # Only add if not already present (format-level takes precedence)
2146
+ lower_key = key.lower()
2147
+ if lower_key not in raw_metadata:
2148
+ raw_metadata[lower_key] = value
2149
+
2150
+ # Normalize metadata using UUID translation layer
2151
+ # This converts ffprobe field names to canonical UUIDs
2152
+ if raw_metadata:
2153
+ normalized = metadata_registry.normalize_metadata_dict("ffprobe", raw_metadata)
2154
+ LOG.debug(f"Extracted and normalized {len(normalized)} metadata fields from ffprobe")
2155
+ return normalized
2156
+
2157
+ return {}
2158
+
2159
+
2160
+ def is_video_corrupt_or_truncated(path: Path) -> tuple[bool, Optional[str]]:
2161
+ """
2162
+ FAST corruption detection for video files (<1 second for most files).
2163
+
2164
+ Strategy: Decode first 5 seconds with error detection enabled.
2165
+ This catches 99% of corruption while being very fast.
2166
+
2167
+ For truncated files: The corruption usually manifests early when
2168
+ decoder hits missing/invalid data, even if file claims full duration.
2169
+ """
2170
+ # Quick check: can ffprobe read the file?
2171
+ probe = ffprobe(path)
2172
+ if probe is None:
2173
+ return True, "ffprobe cannot read file"
2174
+
2175
+ # Check for streams
2176
+ streams = probe.get("streams", [])
2177
+ if not streams:
2178
+ return True, "no streams found"
2179
+
2180
+ # Check for video stream
2181
+ has_video = any(s.get("codec_type") == "video" for s in streams)
2182
+ if not has_video:
2183
+ return True, "no video stream found"
2184
+
2185
+ # Check format info
2186
+ format_info = probe.get("format", {})
2187
+ if not format_info:
2188
+ return True, "no format information"
2189
+
2190
+ # Check duration
2191
+ try:
2192
+ duration = float(format_info.get("duration", 0))
2193
+ if duration <= 0:
2194
+ return True, "invalid or missing duration"
2195
+ except (ValueError, TypeError):
2196
+ return True, "cannot parse duration"
2197
+
2198
+ # FAST CHECK: Decode first 5 seconds with explode on errors
2199
+ # This is MUCH faster than full decode but catches most corruption
2200
+ # Timeout after 5 seconds to prevent hanging
2201
+ cmd = [
2202
+ "ffmpeg",
2203
+ "-v",
2204
+ "error",
2205
+ "-err_detect",
2206
+ "explode", # Exit on first error
2207
+ "-t",
2208
+ "5", # Only decode first 5 seconds
2209
+ "-i",
2210
+ str(path),
2211
+ "-vframes",
2212
+ "60", # Max 60 frames (2.5s at 24fps)
2213
+ "-f",
2214
+ "null",
2215
+ "-",
2216
+ ]
2217
+
2218
+ try:
2219
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
2220
+ except subprocess.TimeoutExpired:
2221
+ return True, "validation timeout - likely corrupted or very slow codec"
2222
+
2223
+ # CRITICAL: Check stderr REGARDLESS of exit code!
2224
+ # ffmpeg returns 0 even when it detects corruption
2225
+ stderr = result.stderr.lower() if result.stderr else ""
2226
+
2227
+ corruption_indicators = [
2228
+ "partial file",
2229
+ "invalid nal",
2230
+ "invalid data",
2231
+ "decoding error",
2232
+ "error splitting",
2233
+ "corrupt",
2234
+ "truncat",
2235
+ "moov atom not found",
2236
+ "incomplete",
2237
+ "unexpected end",
2238
+ "end of file",
2239
+ "premature end",
2240
+ "failed to decode",
2241
+ "invalid bitstream",
2242
+ "error decoding",
2243
+ ]
2244
+
2245
+ for indicator in corruption_indicators:
2246
+ if indicator in stderr:
2247
+ return True, f"corruption detected: {stderr[:200]}"
2248
+
2249
+ # Also check return code for fatal errors
2250
+ if result.returncode != 0:
2251
+ return True, f"decode failed: {stderr[:200]}"
2252
+
2253
+ # ADDITIONAL CHECK: For longer videos, check near the end too
2254
+ # This catches truncation that doesn't manifest in first 5s
2255
+ if duration > 10:
2256
+ # Try to seek near end and decode a few frames
2257
+ seek_time = max(0, duration - 2)
2258
+ cmd_end = [
2259
+ "ffmpeg",
2260
+ "-v",
2261
+ "error",
2262
+ "-ss",
2263
+ str(seek_time),
2264
+ "-i",
2265
+ str(path),
2266
+ "-vframes",
2267
+ "5",
2268
+ "-f",
2269
+ "null",
2270
+ "-",
2271
+ ]
2272
+
2273
+ try:
2274
+ result_end = subprocess.run(cmd_end, capture_output=True, text=True, timeout=3)
2275
+ stderr_end = result_end.stderr.lower() if result_end.stderr else ""
2276
+
2277
+ for indicator in corruption_indicators:
2278
+ if indicator in stderr_end:
2279
+ return True, f"truncated at end: {result_end.stderr[:150]}"
2280
+ except subprocess.TimeoutExpired:
2281
+ # End-check timeout is acceptable for very large files
2282
+ pass
2283
+
2284
+ return False, None
2285
+
2286
+
2287
+ def extract_container(format_name: str) -> str:
2288
+ return format_name.split(",")[0].strip().lower()
2289
+
2290
+
2291
+ def is_skippable_file(path: Path) -> Optional[str]:
2292
+ try:
2293
+ if path.stat().st_size == 0:
2294
+ return "file is empty"
2295
+ except OSError as exc:
2296
+ return f"stat failed: {exc.strerror or exc.args[0]}"
2297
+
2298
+ try:
2299
+ with path.open("rb") as handle:
2300
+ with suppress(AttributeError, OSError):
2301
+ os.posix_fadvise(handle.fileno(), 0, 0, os.POSIX_FADV_RANDOM) # type: ignore[attr-defined]
2302
+ handle.read(1)
2303
+ except PermissionError as exc:
2304
+ return f"permission denied: {exc.filename or path}"
2305
+ except OSError as exc:
2306
+ return f"io error: {exc.strerror or exc.args[0]}"
2307
+
2308
+ suffix = path.suffix.lower()
2309
+ if suffix in TEXT_ONLY_HINT_EXTENSIONS and looks_like_text_file(path):
2310
+ return "text file"
2311
+
2312
+ try:
2313
+ if not is_binary_file(str(path)):
2314
+ return "text file"
2315
+ except Exception as exc: # noqa: BLE001
2316
+ return f"binary check failed: {exc}"
2317
+
2318
+ return None
2319
+
2320
+
2321
+ def detect_media(path: Path, skip_compatibility_check: bool = False) -> tuple[Optional[MediaFile], Optional[str]]:
2322
+ filetype_signature = safe_filetype_guess(path)
2323
+ puremagic_signature = safe_puremagic_guess(path)
2324
+ signatures = [filetype_signature, puremagic_signature]
2325
+
2326
+ if any(is_archive_signature(sig) for sig in signatures):
2327
+ return None, "non-media: archive file"
2328
+
2329
+ if any(is_textual_mime(sig.mime) for sig in signatures):
2330
+ return None, "non-media: text file"
2331
+
2332
+ votes = collect_format_votes(path, puremagic_signature)
2333
+ consensus = select_consensus_vote(votes)
2334
+ if not consensus:
2335
+ return None, votes_error_summary(votes)
2336
+
2337
+ # UUID-based format detection for early filtering
2338
+ tool_results = {}
2339
+ for vote in votes:
2340
+ if vote.tool and not vote.error:
2341
+ # Collect tool outputs for UUID lookup
2342
+ if vote.description:
2343
+ tool_results[vote.tool] = vote.description
2344
+ elif vote.mime:
2345
+ tool_results[vote.tool] = vote.mime
2346
+
2347
+ # Try UUID-based detection
2348
+ detected_uuid = format_registry.format_detection_result(tool_results) if tool_results else None
2349
+ uuid_compatible = format_registry.is_apple_photos_compatible(detected_uuid) if detected_uuid else None
2350
+ uuid_canonical_name = format_registry.get_canonical_name(detected_uuid) if detected_uuid else None
2351
+
2352
+ # Register any tool outputs that lack a mapping to help expand the registry
2353
+ if tool_results:
2354
+ suffix = path.suffix.lower() if path.suffix else ""
2355
+
2356
+ def infer_kind() -> str:
2357
+ if is_image_signature(Signature(extension=suffix)) or any(is_image_signature(sig) for sig in signatures):
2358
+ return "image"
2359
+ if is_video_signature(Signature(extension=suffix)) or any(is_video_signature(sig) for sig in signatures):
2360
+ return "video"
2361
+ return "container"
2362
+
2363
+ for tool_name, token in tool_results.items():
2364
+ if not token:
2365
+ continue
2366
+ if format_registry.lookup_format_uuid(tool_name, token) is None:
2367
+ UNKNOWN_MAPPINGS.register(tool_name, token, infer_kind(), path)
2368
+
2369
+ # Log UUID detection for debugging
2370
+ if detected_uuid:
2371
+ LOG.debug(f"UUID detection for {path.name}: uuid={detected_uuid}, canonical={uuid_canonical_name}, compatible={uuid_compatible}")
2372
+
2373
+ detected_kind = determine_media_kind(votes, consensus)
2374
+ if detected_kind not in {"image", "video", "raw"}:
2375
+ reason = consensus.mime or consensus.description or votes_error_summary(votes)
2376
+ if reason:
2377
+ return None, f"non-media: {reason}"
2378
+ return None, "non-media: unidentified format"
2379
+ size_bytes = None
2380
+ try:
2381
+ size_bytes = path.stat().st_size
2382
+ except OSError:
2383
+ size_bytes = None
2384
+
2385
+ suffix = path.suffix.lower() if path.suffix else ""
2386
+
2387
+ animated = False
2388
+ if suffix in {".gif"}:
2389
+ animated = is_animated_gif(path)
2390
+ elif suffix in {".png"}:
2391
+ animated = is_animated_png(path)
2392
+ elif suffix in {".webp"}:
2393
+ animated = is_animated_webp(path)
2394
+
2395
+ psd_color_mode = get_psd_color_mode(path) if suffix == ".psd" else None
2396
+ if suffix == ".psd" and not psd_color_mode:
2397
+ psd_color_mode = "unknown"
2398
+
2399
+ def vote_for(tool: str) -> Optional[FormatVote]:
2400
+ for vote in votes:
2401
+ if vote.tool == tool:
2402
+ return vote
2403
+ return None
2404
+
2405
+ libmagic_vote = vote_for("libmagic")
2406
+ puremagic_vote = vote_for("puremagic")
2407
+ pyfsig_vote = vote_for("pyfsig")
2408
+ binwalk_vote = vote_for("binwalk")
2409
+
2410
+ libmagic_values = [val for val in (libmagic_vote.mime, libmagic_vote.description) if val] if libmagic_vote else []
2411
+ puremagic_values: list[str] = []
2412
+ if puremagic_vote:
2413
+ if puremagic_vote.mime:
2414
+ puremagic_values.append(puremagic_vote.mime)
2415
+ if puremagic_vote.extension:
2416
+ puremagic_values.append(puremagic_vote.extension)
2417
+ if puremagic_vote.extension.startswith("."):
2418
+ puremagic_values.append(puremagic_vote.extension.lstrip("."))
2419
+ if puremagic_vote.description:
2420
+ puremagic_values.append(puremagic_vote.description)
2421
+ pyfsig_values: list[str] = []
2422
+ if pyfsig_vote:
2423
+ if pyfsig_vote.description:
2424
+ pyfsig_values.append(pyfsig_vote.description)
2425
+ if pyfsig_vote.extension:
2426
+ pyfsig_values.append(pyfsig_vote.extension)
2427
+ if pyfsig_vote.extension.startswith("."):
2428
+ pyfsig_values.append(pyfsig_vote.extension.lstrip("."))
2429
+ binwalk_values = [binwalk_vote.description] if binwalk_vote and binwalk_vote.description else []
2430
+
2431
+ video_codec = None
2432
+ audio_codec = None
2433
+ audio_channels = None
2434
+ audio_layout = None
2435
+ container = None
2436
+ ffprobe_tokens: list[str] = []
2437
+ # Format parameters for expanded UUID generation
2438
+ video_bit_depth = None
2439
+ video_pix_fmt = None
2440
+ video_profile = None
2441
+ audio_sample_rate = None
2442
+ audio_sample_fmt = None
2443
+ # Initialize UUID variables for all file types (not just videos)
2444
+ video_codec_uuid = None
2445
+ audio_codec_uuid = None
2446
+
2447
+ if detected_kind == "video":
2448
+ # Check for corruption before further processing
2449
+ is_corrupt, corrupt_reason = is_video_corrupt_or_truncated(path)
2450
+ if is_corrupt:
2451
+ return None, f"corrupt or truncated video: {corrupt_reason}"
2452
+
2453
+ probe = ffprobe(path)
2454
+ if not probe:
2455
+ return None, "video probe failed"
2456
+
2457
+ # Extract and normalize metadata fields using UUID translation layer
2458
+ # This converts ffprobe field names (creation_time, artist, etc.) to UUIDs
2459
+ normalized_metadata = extract_and_normalize_metadata(probe)
2460
+
2461
+ streams = probe.get("streams", [])
2462
+ format_info = probe.get("format", {})
2463
+ format_name = format_info.get("format_name", "").lower()
2464
+ if not format_name:
2465
+ return None, "unsupported video container"
2466
+ container = extract_container(format_name)
2467
+ for stream in streams:
2468
+ codec_type = stream.get("codec_type")
2469
+ if codec_type == "video" and not video_codec:
2470
+ video_codec = (stream.get("codec_name") or "").lower() or None
2471
+ # Extract format parameters for expanded UUID generation
2472
+ video_bit_depth = stream.get("bits_per_raw_sample") # Bit depth (8, 10, 12, 16)
2473
+ if not video_bit_depth:
2474
+ # Fallback: try bits_per_component or pix_fmt parsing
2475
+ video_bit_depth = stream.get("bits_per_component")
2476
+ video_pix_fmt = stream.get("pix_fmt") # Pixel format (yuv420p, yuv422p, etc.)
2477
+ video_profile = stream.get("profile") # Profile (High, Main, Main 10, etc.)
2478
+ elif codec_type == "audio" and not audio_codec:
2479
+ audio_codec = (stream.get("codec_name") or "").lower() or None
2480
+ audio_channels = stream.get("channels")
2481
+ audio_layout = stream.get("channel_layout")
2482
+ # Extract audio format parameters
2483
+ sample_rate_val = stream.get("sample_rate")
2484
+ try:
2485
+ audio_sample_rate = int(sample_rate_val) if sample_rate_val is not None else None
2486
+ except (TypeError, ValueError):
2487
+ audio_sample_rate = None
2488
+ audio_sample_fmt = stream.get("sample_fmt")
2489
+ if container:
2490
+ ffprobe_tokens.append(f"container:{container}")
2491
+ if video_codec:
2492
+ ffprobe_tokens.append(f"video:{video_codec}")
2493
+ if audio_codec:
2494
+ ffprobe_tokens.append(f"audio:{audio_codec}")
2495
+
2496
+ # Generate expanded UUID for video codec with format parameters
2497
+ # This provides granular format identification (e.g., H.264 8-bit vs 10-bit)
2498
+ # IMPORTANT: Use the translation layer to get the base codec UUID
2499
+ if video_codec:
2500
+ try:
2501
+ # Translate ffprobe codec name to base UUID using the unified translation layer
2502
+ base_codec_uuid = format_registry.lookup_format_uuid("ffprobe", video_codec)
2503
+ if base_codec_uuid:
2504
+ # Extract the base UUID (everything before the type suffix)
2505
+ # E.g., "b2e62c4a-6122-548c-9bfa-0fcf3613942a-V" → "b2e62c4a-6122-548c-9bfa-0fcf3613942a"
2506
+ base_uuid_parts = base_codec_uuid.split("-")
2507
+ if len(base_uuid_parts) >= 5:
2508
+ base_uuid = "-".join(base_uuid_parts[:5])
2509
+
2510
+ # Convert bit_depth to int if it's a string
2511
+ bit_depth_int = None
2512
+ if video_bit_depth:
2513
+ bit_depth_int = int(video_bit_depth) if isinstance(video_bit_depth, str) else video_bit_depth
2514
+
2515
+ # Build expanded UUID with format parameters
2516
+ # Start with base UUID, append parameters, then type suffix
2517
+ params = []
2518
+ if bit_depth_int:
2519
+ params.append(f"{bit_depth_int}bit")
2520
+ if video_pix_fmt:
2521
+ params.append(video_pix_fmt)
2522
+ if video_profile:
2523
+ params.append(video_profile.lower())
2524
+
2525
+ if params:
2526
+ param_suffix = "-".join(params)
2527
+ video_codec_uuid = f"{base_uuid}-{param_suffix}-V"
2528
+ else:
2529
+ # No parameters, use base UUID with type suffix
2530
+ video_codec_uuid = base_codec_uuid
2531
+
2532
+ LOG.debug(f"Generated expanded video codec UUID for {path.name}: {video_codec_uuid} (base={base_uuid}, codec={video_codec}, bit_depth={bit_depth_int}, pix_fmt={video_pix_fmt}, profile={video_profile})")
2533
+ else:
2534
+ LOG.warning(f"Base codec UUID has unexpected format for {path.name}: {base_codec_uuid}")
2535
+ video_codec_uuid = base_codec_uuid # Use as-is
2536
+ else:
2537
+ UNKNOWN_MAPPINGS.register("ffprobe", video_codec, "video", path)
2538
+ LOG.info("No UUID mapping found for ffprobe codec '%s' for %s", video_codec, path.name)
2539
+ except Exception as e:
2540
+ LOG.warning(f"Failed to generate expanded video codec UUID for {path.name}: {e}")
2541
+ # Fall back to base UUID without parameters
2542
+ video_codec_uuid = None
2543
+
2544
+ # Generate expanded UUID for audio codec with format parameters
2545
+ # This provides granular format identification (e.g., AAC 48kHz vs 6kHz)
2546
+ # IMPORTANT: Use the translation layer to get the base codec UUID
2547
+ audio_codec_uuid = None
2548
+ if audio_codec:
2549
+ try:
2550
+ # Translate ffprobe codec name to base UUID using the unified translation layer
2551
+ base_audio_uuid = format_registry.lookup_format_uuid("ffprobe", audio_codec)
2552
+ if base_audio_uuid:
2553
+ # Extract the base UUID (everything before the type suffix)
2554
+ # E.g., "501331ba-42ea-561c-e5df-8a824df17e3f-A" → "501331ba-42ea-561c-e5df-8a824df17e3f"
2555
+ base_uuid_parts = base_audio_uuid.split("-")
2556
+ if len(base_uuid_parts) >= 5:
2557
+ base_uuid = "-".join(base_uuid_parts[:5])
2558
+
2559
+ # Build expanded UUID with format parameters
2560
+ # Start with base UUID, append parameters, then type suffix
2561
+ params = []
2562
+ if audio_sample_rate:
2563
+ params.append(str(audio_sample_rate))
2564
+ if audio_sample_fmt:
2565
+ params.append(audio_sample_fmt)
2566
+
2567
+ if params:
2568
+ param_suffix = "-".join(params)
2569
+ audio_codec_uuid = f"{base_uuid}-{param_suffix}-A"
2570
+ else:
2571
+ # No parameters, use base UUID with type suffix
2572
+ audio_codec_uuid = base_audio_uuid
2573
+
2574
+ LOG.debug(f"Generated expanded audio codec UUID for {path.name}: {audio_codec_uuid} (base={base_uuid}, codec={audio_codec}, sample_rate={audio_sample_rate}, sample_fmt={audio_sample_fmt})")
2575
+ else:
2576
+ LOG.warning(f"Base audio codec UUID has unexpected format for {path.name}: {base_audio_uuid}")
2577
+ audio_codec_uuid = base_audio_uuid # Use as-is
2578
+ else:
2579
+ UNKNOWN_MAPPINGS.register("ffprobe", audio_codec, "audio", path)
2580
+ LOG.info("No UUID mapping found for ffprobe audio codec '%s' for %s", audio_codec, path.name)
2581
+ except Exception as e:
2582
+ LOG.warning(f"Failed to generate expanded audio codec UUID for {path.name}: {e}")
2583
+ # Fall back to None
2584
+ audio_codec_uuid = None
2585
+
2586
+ extension_candidates: list[Optional[str]] = []
2587
+ if consensus:
2588
+ consensus_ext = canonicalize_extension(consensus.extension) # Apply canonicalization to detected extension
2589
+ if consensus_ext:
2590
+ extension_candidates.append(consensus_ext)
2591
+ suffix_ext = canonicalize_extension(path.suffix) # Apply canonicalization to file suffix
2592
+ if suffix_ext and suffix_ext not in extension_candidates:
2593
+ extension_candidates.append(suffix_ext)
2594
+ extension_candidates.append(None)
2595
+
2596
+ rule: Optional[FormatRule] = None
2597
+ for candidate in extension_candidates:
2598
+ rule = match_rule(
2599
+ extension=candidate,
2600
+ libmagic=libmagic_values,
2601
+ puremagic=puremagic_values,
2602
+ pyfsig=pyfsig_values,
2603
+ binwalk=binwalk_values,
2604
+ rawpy=None,
2605
+ ffprobe_streams=ffprobe_tokens,
2606
+ animated=animated,
2607
+ size_bytes=size_bytes,
2608
+ psd_color_mode=psd_color_mode,
2609
+ )
2610
+ if rule:
2611
+ break
2612
+
2613
+ # CRITICAL: JSON file is the SOLE source of truth for format identification
2614
+ # If UUID detection fails, the file is unidentified and must be rejected
2615
+ if not detected_uuid:
2616
+ LOG.debug(f"UUID detection failed for {path.name} - file not identified")
2617
+ return None, "non-media: format not identified by UUID system"
2618
+
2619
+ # For video files, pass both container UUID and video codec UUID
2620
+ # This provides granular format identification (e.g., H.264 8-bit vs 10-bit)
2621
+ # while also checking container compatibility (MP4/MOV vs MKV)
2622
+ primary_uuid = detected_uuid
2623
+ container_uuid_param = None
2624
+ if detected_kind == "video" and "video_codec_uuid" in locals() and video_codec_uuid:
2625
+ # Use expanded video codec UUID as primary, pass container UUID separately
2626
+ primary_uuid = video_codec_uuid
2627
+ container_uuid_param = detected_uuid
2628
+ LOG.debug(f"Using expanded video codec UUID for {path.name}: {video_codec_uuid} (container UUID: {detected_uuid})")
2629
+
2630
+ # UUID detected - determine action from JSON
2631
+ # Pass audio_codec_uuid instead of audio_codec to use UUID-based compatibility checking
2632
+ uuid_action = format_registry.get_format_action(primary_uuid, video_codec, audio_codec_uuid, container_uuid_param)
2633
+ if not uuid_action:
2634
+ # UUID identified but format is unsupported
2635
+ LOG.debug(f"UUID {primary_uuid} identified but unsupported for {path.name}")
2636
+ return None, f"non-media: unsupported format (UUID={primary_uuid})"
2637
+
2638
+ # UUID system says this format is supported - use its action
2639
+ LOG.debug(f"UUID-based action for {path.name}: {uuid_action} (UUID={primary_uuid}, container={container_uuid_param})")
2640
+
2641
+ # JSON is the sole source of truth - we already have uuid_action from above
2642
+ # Keep rule for metadata only (rule_id, notes, extensions for legacy compatibility)
2643
+ if not rule:
2644
+ # No rule found - but UUID system already approved it, so create a minimal rule
2645
+ # This shouldn't happen often as most formats should have rules
2646
+ LOG.warning(f"UUID {detected_uuid} approved but no format rule found for {path.name}")
2647
+ return None, f"no format rule found for detected UUID {detected_uuid}"
2648
+
2649
+ # Use uuid_action as the effective action (JSON is authoritative)
2650
+ effective_action = uuid_action
2651
+
2652
+ if rule.category == "vector":
2653
+ return None, "vector formats are not supported by Apple Photos"
2654
+
2655
+ metadata: dict[str, Any] = {
2656
+ "rule_conditions": rule.conditions,
2657
+ "rule_notes": rule.notes,
2658
+ "detected_uuid": detected_uuid,
2659
+ "uuid_canonical_name": uuid_canonical_name,
2660
+ "uuid_compatible": uuid_compatible,
2661
+ }
2662
+
2663
+ if rule.category == "raw":
2664
+ raw_extensions = [path.suffix] + list(rule.extensions)
2665
+ install_raw_dependency_groups(collect_raw_groups_from_extensions(raw_extensions))
2666
+ raw_media, raw_reason = refine_raw_media(path, raw_extensions)
2667
+ if not raw_media:
2668
+ return None, raw_reason or "unsupported raw format"
2669
+ raw_media.rule_id = rule.rule_id
2670
+ raw_media.action = effective_action
2671
+ raw_media.requires_processing = effective_action != "import"
2672
+ raw_media.notes = rule.notes
2673
+ raw_media.metadata.update(metadata)
2674
+ return raw_media, None
2675
+
2676
+ original_extension = canonicalize_extension(path.suffix) # Apply canonicalization
2677
+ consensus_extension = canonicalize_extension(consensus.extension) if consensus else None # Apply canonicalization
2678
+ preferred_extension = canonicalize_extension(rule.extensions[0]) if rule.extensions else None # Apply canonicalization
2679
+
2680
+ # NEVER change extension unless format detected differs from file extension
2681
+ # Priority: always keep original if valid, only use detected format if no extension or wrong extension
2682
+ if original_extension and rule.extensions and original_extension in rule.extensions:
2683
+ # Original extension is valid for the detected format - keep it!
2684
+ extension = original_extension
2685
+ elif original_extension:
2686
+ # File has extension but it doesn't match detected format - use detected format
2687
+ extension = consensus_extension or preferred_extension or original_extension or ".media"
2688
+ else:
2689
+ # File has no extension - use detected format
2690
+ extension = consensus_extension or preferred_extension or ".media"
2691
+ if detected_kind == "image":
2692
+ media = MediaFile(
2693
+ source=path,
2694
+ kind="image",
2695
+ extension=extension or ".img",
2696
+ format_name=(extension or ".img").lstrip("."),
2697
+ compatible=effective_action == "import",
2698
+ original_suffix=path.suffix,
2699
+ rule_id=rule.rule_id,
2700
+ action=effective_action,
2701
+ requires_processing=effective_action != "import",
2702
+ notes=rule.notes,
2703
+ metadata=metadata,
2704
+ )
2705
+ media.detected_compatible = media.compatible
2706
+ media.metadata.update(
2707
+ {
2708
+ "animated": animated,
2709
+ "size_bytes": size_bytes,
2710
+ "psd_color_mode": psd_color_mode,
2711
+ }
2712
+ )
2713
+ refined_media, refine_reason = refine_image_media(media, skip_compatibility_check)
2714
+ if refined_media is None:
2715
+ return None, refine_reason or "image validation failed"
2716
+ return refined_media, None
2717
+
2718
+ if detected_kind == "video":
2719
+ media = MediaFile(
2720
+ source=path,
2721
+ kind="video",
2722
+ extension=extension or ".mp4",
2723
+ format_name=container or "video",
2724
+ compatible=effective_action == "import",
2725
+ video_codec=video_codec,
2726
+ audio_codec=audio_codec,
2727
+ audio_sample_rate=audio_sample_rate,
2728
+ audio_sample_fmt=audio_sample_fmt,
2729
+ original_suffix=path.suffix,
2730
+ rule_id=rule.rule_id,
2731
+ action=effective_action,
2732
+ requires_processing=effective_action != "import",
2733
+ notes=rule.notes,
2734
+ metadata=metadata,
2735
+ )
2736
+ media.detected_compatible = media.compatible
2737
+ media.metadata.update(
2738
+ {
2739
+ "container": container,
2740
+ "size_bytes": size_bytes,
2741
+ "audio_channels": audio_channels,
2742
+ "audio_layout": audio_layout,
2743
+ "audio_sample_rate": audio_sample_rate,
2744
+ "audio_sample_fmt": audio_sample_fmt,
2745
+ }
2746
+ )
2747
+ # Add normalized metadata from ffprobe (UUID-keyed fields)
2748
+ # This includes creation_time, artist, title, etc. with UUID keys
2749
+ if normalized_metadata:
2750
+ media.metadata.update(normalized_metadata)
2751
+ refined_media, refine_reason = refine_video_media(media, skip_compatibility_check)
2752
+ if refined_media is None:
2753
+ return None, refine_reason or "video validation failed"
2754
+ return refined_media, None
2755
+
2756
+ return None, "unsupported format"
2757
+
2758
+
2759
+ def safe_filetype_guess(path: Path) -> Signature:
2760
+ try:
2761
+ guess = filetype.guess(str(path))
2762
+ except Exception: # noqa: BLE001
2763
+ return Signature()
2764
+ if not guess:
2765
+ return Signature()
2766
+ extension = normalize_extension(guess.extension)
2767
+ mime = guess.mime.lower() if guess.mime else None
2768
+ return Signature(extension=extension, mime=mime)
2769
+
2770
+
2771
+ def safe_puremagic_guess(path: Path) -> Signature:
2772
+ extension = None
2773
+ mime = None
2774
+ try:
2775
+ extension = normalize_extension(puremagic.from_file(str(path)))
2776
+ except puremagic.PureError:
2777
+ extension = None
2778
+ except Exception: # noqa: BLE001
2779
+ extension = None
2780
+ try:
2781
+ mime_guess = puremagic.from_file(str(path), mime=True)
2782
+ mime = mime_guess.lower() if mime_guess else None
2783
+ except puremagic.PureError:
2784
+ mime = None
2785
+ except Exception: # noqa: BLE001
2786
+ mime = None
2787
+ return Signature(extension=extension, mime=mime)
2788
+
2789
+
2790
+ def canonical_image_extension(name: Optional[str]) -> Optional[str]:
2791
+ if not name:
2792
+ return None
2793
+ key = name.lower().lstrip(".")
2794
+ return IMAGE_EXTENSION_MAP.get(key)
2795
+
2796
+
2797
+ def canonical_video_extension(name: Optional[str]) -> Optional[str]:
2798
+ key = normalize_extension(name)
2799
+ if not key:
2800
+ return None
2801
+ return VIDEO_EXTENSION_MAP.get(key)
2802
+
2803
+
2804
+ def is_archive_signature(sig: Signature) -> bool:
2805
+ if not sig or sig.is_empty():
2806
+ return False
2807
+ if sig.extension and sig.extension in ARCHIVE_EXTENSIONS:
2808
+ return True
2809
+ if sig.mime and sig.mime in ARCHIVE_MIME_TYPES:
2810
+ return True
2811
+ return False
2812
+
2813
+
2814
+ def is_image_signature(sig: Signature) -> bool:
2815
+ if not sig or sig.is_empty():
2816
+ return False
2817
+ if sig.mime and sig.mime.startswith("image/"):
2818
+ return True
2819
+ if sig.extension and sig.extension in ALL_IMAGE_EXTENSIONS:
2820
+ return True
2821
+ return False
2822
+
2823
+
2824
+ def is_video_signature(sig: Signature) -> bool:
2825
+ if not sig or sig.is_empty():
2826
+ return False
2827
+ if sig.mime and sig.mime.startswith("video/"):
2828
+ return True
2829
+ if sig.extension and sig.extension in VIDEO_EXTENSION_HINTS:
2830
+ return True
2831
+ return False
2832
+
2833
+
2834
+ def choose_image_extension(signatures: Iterable[Signature]) -> Optional[str]:
2835
+ for sig in signatures:
2836
+ ext = canonical_image_extension(sig.extension)
2837
+ if ext:
2838
+ return ext
2839
+ for sig in signatures:
2840
+ if sig.mime:
2841
+ mapped = IMAGE_MIME_EXTENSION_MAP.get(sig.mime)
2842
+ if mapped:
2843
+ return mapped
2844
+ return None
2845
+
2846
+
2847
+ def choose_video_extension(signatures: Iterable[Signature]) -> Optional[str]:
2848
+ for sig in signatures:
2849
+ ext = canonical_video_extension(sig.extension)
2850
+ if ext:
2851
+ return ext
2852
+ for sig in signatures:
2853
+ if sig.mime:
2854
+ mapped = VIDEO_MIME_EXTENSION_MAP.get(sig.mime)
2855
+ if mapped:
2856
+ return mapped
2857
+ return None
2858
+
2859
+
2860
+ def guess_extension(container: str, kind: str) -> Optional[str]:
2861
+ container = container.lower()
2862
+ if kind == "image":
2863
+ return IMAGE_EXTENSION_MAP.get(container)
2864
+ video_map = {
2865
+ "mov": ".mov",
2866
+ "quicktime": ".mov",
2867
+ "mp4": ".mp4",
2868
+ "m4v": ".m4v",
2869
+ "matroska": ".mkv",
2870
+ "webm": ".webm",
2871
+ "avi": ".avi",
2872
+ "3gpp": ".3gp",
2873
+ "mpegts": ".ts",
2874
+ "flv": ".flv",
2875
+ }
2876
+ return video_map.get(container)
2877
+
2878
+
2879
+ def should_ignore(entry: Path) -> bool:
2880
+ """Check if file/directory should be excluded from scanning.
2881
+
2882
+ Excludes:
2883
+ - FOUND_MEDIA_FILES_* staging directories
2884
+ - .smm__runtime_logs_* log directories (timestamped, in CWD)
2885
+ - smm_run_* and smm_skipped_files_* log files
2886
+ - .DS_Store system files
2887
+ - Files with __SMM token (already processed by SMM)
2888
+ - Files managed by Apple Photos (have assetsd xattrs)
2889
+ """
2890
+ name = entry.name
2891
+ # Exclude staging directories
2892
+ if name.startswith("FOUND_MEDIA_FILES_"):
2893
+ return True
2894
+ # Exclude timestamped log directories (new pattern)
2895
+ if name.startswith(SMM_LOGS_SUBDIR):
2896
+ return True
2897
+ if name.startswith("DEBUG_raw_applescript_output_") or name.startswith("DEBUG_photos_output_"):
2898
+ return True
2899
+ if name.startswith("Photos_rejections_"):
2900
+ return True
2901
+ # Exclude individual log files and skip logs (legacy/backward compat)
2902
+ if name.startswith("smm_run_") or name.startswith("smm_skipped_files_"):
2903
+ return True
2904
+ # Exclude macOS metadata
2905
+ if name == ".DS_Store":
2906
+ return True
2907
+ # Exclude files already processed by SMM (have __SMM token in filename)
2908
+ if STAGING_TOKEN_PREFIX in name:
2909
+ LOG.debug("Skipping already-processed file: %s", name)
2910
+ return True
2911
+ return False
2912
+
2913
+
2914
+ def is_photos_managed_file(path: Path) -> bool:
2915
+ """Check if a file is managed by Apple Photos (has assetsd xattrs).
2916
+
2917
+ Files imported into Apple Photos get extended attributes from the assetsd
2918
+ daemon. These files are locked by the Photos database and cannot be moved
2919
+ or modified without causing sync issues or permission errors.
2920
+
2921
+ Args:
2922
+ path: Path to check
2923
+
2924
+ Returns:
2925
+ True if file has com.apple.assetsd.UUID xattr (managed by Photos)
2926
+ """
2927
+ try:
2928
+ import xattr # type: ignore[import-not-found]
2929
+
2930
+ attrs = xattr.listxattr(str(path))
2931
+ # Check for the UUID attribute which definitively marks Photos-managed files
2932
+ return "com.apple.assetsd.UUID" in attrs
2933
+ except ImportError:
2934
+ # xattr module not available, fall back to subprocess
2935
+ # Use xattr without -l to list only attribute names (avoids binary value decoding issues)
2936
+ try:
2937
+ result = subprocess.run(
2938
+ ["xattr", str(path)],
2939
+ capture_output=True,
2940
+ text=True,
2941
+ timeout=5,
2942
+ )
2943
+ return "com.apple.assetsd.UUID" in result.stdout
2944
+ except Exception:
2945
+ return False
2946
+ except Exception:
2947
+ return False
2948
+
2949
+
2950
+ def extract_live_photo_content_id(path: Path) -> Optional[str]:
2951
+ """
2952
+ Extract Live Photo content identifier from HEIC/MOV file using exiftool.
2953
+
2954
+ Live Photos have a content identifier that links the HEIC photo and MOV video.
2955
+ This function extracts that identifier to enable pairing detection.
2956
+
2957
+ Args:
2958
+ path: Path to HEIC or MOV file
2959
+
2960
+ Returns:
2961
+ Content identifier string if found, None otherwise
2962
+ """
2963
+ exiftool = find_executable("exiftool")
2964
+ if not exiftool:
2965
+ LOG.debug("exiftool not available, skipping Live Photo content ID extraction")
2966
+ return None
2967
+
2968
+ try:
2969
+ result = subprocess.run(
2970
+ [exiftool, "-ContentIdentifier", "-b", str(path)],
2971
+ capture_output=True,
2972
+ text=True,
2973
+ timeout=10,
2974
+ check=False,
2975
+ )
2976
+ if result.returncode == 0 and result.stdout.strip():
2977
+ content_id = result.stdout.strip()
2978
+ LOG.debug("Extracted Live Photo content ID from %s: %s", path.name, content_id)
2979
+ return content_id
2980
+ except Exception as exc:
2981
+ LOG.debug("Failed to extract Live Photo content ID from %s: %s", path.name, exc)
2982
+ return None
2983
+
2984
+
2985
+ def is_panoramic_photo(path: Path) -> bool:
2986
+ """
2987
+ Detect if a photo is a panoramic image using EXIF metadata.
2988
+
2989
+ Panoramic photos have special metadata tags that identify them as panoramas.
2990
+ Common indicators include ProjectionType, UsePanoramaViewer, or PoseHeadingDegrees.
2991
+
2992
+ Args:
2993
+ path: Path to image file
2994
+
2995
+ Returns:
2996
+ True if panoramic metadata detected, False otherwise
2997
+ """
2998
+ exiftool = find_executable("exiftool")
2999
+ if not exiftool:
3000
+ LOG.debug("exiftool not available, skipping panoramic photo detection")
3001
+ return False
3002
+
3003
+ try:
3004
+ result = subprocess.run(
3005
+ [exiftool, "-ProjectionType", "-UsePanoramaViewer", "-PoseHeadingDegrees", "-b", str(path)],
3006
+ capture_output=True,
3007
+ text=True,
3008
+ timeout=10,
3009
+ check=False,
3010
+ )
3011
+ if result.returncode == 0:
3012
+ output = result.stdout.strip()
3013
+ # Check for common panoramic indicators
3014
+ if output and any(indicator in output.lower() for indicator in ["equirectangular", "cylindrical", "spherical", "true", "360"]):
3015
+ LOG.debug("Detected panoramic photo: %s", path.name)
3016
+ return True
3017
+ except Exception as exc:
3018
+ LOG.debug("Failed to check panoramic metadata for %s: %s", path.name, exc)
3019
+ return False
3020
+
3021
+
3022
+ def detect_live_photo_pairs(media_files: list[MediaFile]) -> dict[str, tuple[MediaFile, MediaFile]]:
3023
+ """
3024
+ Detect Live Photo pairs (HEIC + MOV) by matching stems and content identifiers.
3025
+
3026
+ Live Photos consist of:
3027
+ - A HEIC/JPG still image
3028
+ - A MOV video clip
3029
+ - Both files share the same stem (e.g., IMG_1234.HEIC + IMG_1234.MOV)
3030
+ - Both files have matching ContentIdentifier metadata
3031
+
3032
+ Args:
3033
+ media_files: List of detected media files
3034
+
3035
+ Returns:
3036
+ Dictionary mapping content_id -> (image_file, video_file) for each Live Photo pair
3037
+ """
3038
+ # Group files by stem
3039
+ files_by_stem: dict[str, list[MediaFile]] = {}
3040
+ for media in media_files:
3041
+ stem = media.source.stem
3042
+ if stem not in files_by_stem:
3043
+ files_by_stem[stem] = []
3044
+ files_by_stem[stem].append(media)
3045
+
3046
+ live_photo_pairs: dict[str, tuple[MediaFile, MediaFile]] = {}
3047
+
3048
+ # Check each stem group for Live Photo patterns
3049
+ for stem, files in files_by_stem.items():
3050
+ if len(files) < 2:
3051
+ continue
3052
+
3053
+ # Find HEIC/JPG and MOV candidates
3054
+ image_candidates = [f for f in files if f.kind == "image" and f.extension.lower() in {".heic", ".heif", ".jpg", ".jpeg"}]
3055
+ video_candidates = [f for f in files if f.kind == "video" and f.extension.lower() == ".mov"]
3056
+
3057
+ if not image_candidates or not video_candidates:
3058
+ continue
3059
+
3060
+ # Try to match by content identifier
3061
+ for img in image_candidates:
3062
+ img_content_id = extract_live_photo_content_id(img.source)
3063
+ if not img_content_id:
3064
+ continue
3065
+
3066
+ for vid in video_candidates:
3067
+ vid_content_id = extract_live_photo_content_id(vid.source)
3068
+ if vid_content_id and vid_content_id == img_content_id:
3069
+ # Found a Live Photo pair!
3070
+ LOG.debug("Detected Live Photo pair: %s + %s (content ID: %s)", img.source.name, vid.source.name, img_content_id)
3071
+ live_photo_pairs[img_content_id] = (img, vid)
3072
+
3073
+ # Store pairing metadata in both files
3074
+ img.metadata["is_live_photo"] = True
3075
+ img.metadata["live_photo_pair"] = str(vid.source)
3076
+ img.metadata["live_photo_content_id"] = img_content_id
3077
+
3078
+ vid.metadata["is_live_photo"] = True
3079
+ vid.metadata["live_photo_pair"] = str(img.source)
3080
+ vid.metadata["live_photo_content_id"] = vid_content_id
3081
+ break
3082
+
3083
+ return live_photo_pairs
3084
+
3085
+
3086
+ def gather_media_files(
3087
+ root: Path,
3088
+ recursive: bool,
3089
+ follow_symlinks: bool,
3090
+ skip_logger: SkipLogger,
3091
+ stats: RunStatistics,
3092
+ skip_compatibility_check: bool = False,
3093
+ ) -> list[MediaFile]:
3094
+ media_files: list[MediaFile] = []
3095
+
3096
+ def iter_candidate_files() -> Iterable[Path]:
3097
+ if recursive:
3098
+ for dirpath, dirnames, filenames in os.walk(root, followlinks=follow_symlinks):
3099
+ dirnames[:] = [d for d in dirnames if not should_ignore(Path(dirpath) / d)]
3100
+ for filename in filenames:
3101
+ entry = Path(dirpath) / filename
3102
+ if should_ignore(entry) or entry.is_dir():
3103
+ continue
3104
+ yield entry
3105
+ else:
3106
+ for entry in root.iterdir():
3107
+ if should_ignore(entry) or entry.is_dir():
3108
+ continue
3109
+ yield entry
3110
+
3111
+ scan_progress = ProgressReporter(0, "Scanning files")
3112
+
3113
+ def handle_file(file_path: Path) -> None:
3114
+ stats.total_files_scanned += 1
3115
+
3116
+ if file_path.is_symlink() and not follow_symlinks:
3117
+ skip_logger.log(file_path, "symlink (use --follow-symlinks to allow)")
3118
+ stats.skipped_other += 1
3119
+ return
3120
+ if not file_path.is_file():
3121
+ return
3122
+
3123
+ # Skip files managed by Apple Photos - they're already imported and locked
3124
+ if is_photos_managed_file(file_path):
3125
+ LOG.debug("Skipping Photos-managed file: %s", file_path.name)
3126
+ stats.skipped_other += 1
3127
+ return
3128
+
3129
+ skippable_reason = is_skippable_file(file_path)
3130
+ if skippable_reason:
3131
+ skip_logger.log(file_path, skippable_reason)
3132
+ if "text file" in skippable_reason.lower():
3133
+ stats.total_text_files += 1
3134
+ elif "empty" in skippable_reason.lower() or "corrupt" in skippable_reason.lower():
3135
+ stats.skipped_corrupt_or_empty += 1
3136
+ else:
3137
+ stats.skipped_other += 1
3138
+ return
3139
+
3140
+ # File is binary
3141
+ stats.total_binary_files += 1
3142
+
3143
+ media, reject_reason = detect_media(file_path, skip_compatibility_check)
3144
+ if media:
3145
+ stats.total_media_detected += 1
3146
+ if media.compatible and media.action == "import":
3147
+ stats.media_compatible += 1
3148
+ else:
3149
+ stats.media_incompatible += 1
3150
+ if media.action and not media.action.startswith("skip"):
3151
+ stats.incompatible_with_conversion_rule += 1
3152
+
3153
+ # Check for panoramic photos
3154
+ if media.kind == "image" and media.extension.lower() in {".heic", ".heif", ".jpg", ".jpeg"}:
3155
+ if is_panoramic_photo(file_path):
3156
+ media.metadata["is_panoramic"] = True
3157
+ LOG.debug("Detected panoramic photo: %s", file_path.name)
3158
+
3159
+ media_files.append(media)
3160
+ return
3161
+ if reject_reason:
3162
+ reason_lower = reject_reason.lower()
3163
+ is_non_media = reason_lower.startswith("non-media:")
3164
+ if not is_non_media:
3165
+ is_non_media = any(keyword in reason_lower for keyword in NON_MEDIA_REASON_KEYWORDS)
3166
+ if "unknown" in reason_lower or "not recognised" in reason_lower:
3167
+ stats.skipped_unknown_format += 1
3168
+ log_reason = reject_reason
3169
+ elif "corrupt" in reason_lower or "empty" in reason_lower:
3170
+ stats.skipped_corrupt_or_empty += 1
3171
+ log_reason = reject_reason
3172
+ elif is_non_media:
3173
+ stats.skipped_non_media += 1
3174
+ if reason_lower.startswith("non-media:") and ":" in reject_reason:
3175
+ log_reason = reject_reason.split(":", 1)[1].strip()
3176
+ if not log_reason:
3177
+ log_reason = "non-media file"
3178
+ else:
3179
+ log_reason = reject_reason
3180
+ else:
3181
+ stats.skipped_errors += 1
3182
+ log_reason = reject_reason
3183
+ if not is_non_media:
3184
+ skip_logger.log(file_path, log_reason)
3185
+ return
3186
+
3187
+ suffix = normalize_extension(file_path.suffix)
3188
+ signatures = [safe_filetype_guess(file_path), safe_puremagic_guess(file_path)]
3189
+ if (suffix and (suffix in ALL_IMAGE_EXTENSIONS or suffix in VIDEO_EXTENSION_HINTS)) or any(is_image_signature(sig) or is_video_signature(sig) for sig in signatures):
3190
+ skip_logger.log(file_path, "corrupt or unsupported media")
3191
+ stats.skipped_corrupt_or_empty += 1
3192
+
3193
+ for file_path in iter_candidate_files():
3194
+ handle_file(file_path)
3195
+ scan_progress.update()
3196
+
3197
+ scan_progress.finish()
3198
+
3199
+ # Detect Live Photo pairs after all files are scanned
3200
+ if media_files:
3201
+ live_photo_pairs = detect_live_photo_pairs(media_files)
3202
+ if live_photo_pairs:
3203
+ LOG.debug("Found %d Live Photo pair(s)", len(live_photo_pairs))
3204
+
3205
+ return media_files
3206
+
3207
+
3208
+ def next_available_name(directory: Path, stem: str, extension: str) -> Path:
3209
+ counter = 0
3210
+ while True:
3211
+ suffix = "" if counter == 0 else f"_{counter}"
3212
+ candidate = directory / f"{stem}{suffix}{extension}"
3213
+ if not candidate.exists():
3214
+ return candidate
3215
+ counter += 1
3216
+
3217
+
3218
+ def build_safe_stem(original_stem: str, run_token: str, sequence: int) -> str:
3219
+ normalized = unicodedata.normalize("NFKD", original_stem)
3220
+ ascii_stem = normalized.encode("ascii", "ignore").decode("ascii")
3221
+ ascii_stem = SAFE_NAME_PATTERN.sub("_", ascii_stem)
3222
+ ascii_stem = re.sub(r"_+", "_", ascii_stem).strip("._- ")
3223
+ if not ascii_stem:
3224
+ ascii_stem = "media"
3225
+
3226
+ run_fragment = run_token[-6:] if len(run_token) >= 6 else run_token
3227
+ run_fragment = run_fragment or "run"
3228
+ unique_suffix = f"{run_fragment}{sequence:04d}"
3229
+
3230
+ base_limit = max(10, MAX_SAFE_STEM_LENGTH - len(unique_suffix) - 1)
3231
+ if len(ascii_stem) > base_limit:
3232
+ ascii_stem = ascii_stem[:base_limit].rstrip("._- ") or "media"
3233
+
3234
+ safe_stem = f"{ascii_stem}_{unique_suffix}"
3235
+ return safe_stem[:MAX_SAFE_STEM_LENGTH]
3236
+
3237
+
3238
+ def stem_needs_sanitization(stem: str) -> bool:
3239
+ if not stem:
3240
+ return True
3241
+ if SAFE_NAME_PATTERN.search(stem):
3242
+ return True
3243
+ if len(stem) > MAX_SAFE_STEM_LENGTH:
3244
+ return True
3245
+ if stem.strip() != stem:
3246
+ return True
3247
+ return False
3248
+
3249
+
3250
+ def move_to_staging(
3251
+ media_files: Iterable[MediaFile],
3252
+ staging: Path,
3253
+ originals_dir: Path,
3254
+ copy_files: bool = False,
3255
+ ) -> None:
3256
+ """Stage media files with unique sequential suffix for folder import.
3257
+
3258
+ Every file gets a suffix like " (1)", " (2)", etc. before extension.
3259
+ This enables deterministic filename reconciliation after Photos import,
3260
+ eliminating the need for separate sanitization passes.
3261
+
3262
+ The sequential suffix ensures every file has a unique, predictable name
3263
+ that can be matched against Photos' returned filenames to determine
3264
+ which files were imported vs skipped.
3265
+
3266
+ Examples:
3267
+ photo.jpg → photo (1).jpg
3268
+ photo.jpg (from different subfolder) → photo (2).jpg
3269
+ video.mov → video (1).mov
3270
+ IMG_1234.HEIC (Live Photo) → IMG_1234 (1).HEIC
3271
+ IMG_1234.MOV (paired) → IMG_1234 (2).MOV
3272
+
3273
+ Args:
3274
+ media_files: Iterable of MediaFile objects to stage
3275
+ staging: Path to staging directory (FOUND_MEDIA_FILES_*)
3276
+ originals_dir: Path to originals archive directory (SEPARATE from staging, not a subdirectory)
3277
+
3278
+ Note:
3279
+ Live Photo pairs maintain consistent stems but get different suffixes
3280
+ since they are separate files.
3281
+ """
3282
+ originals_dir.mkdir(parents=True, exist_ok=True)
3283
+ media_list = list(media_files)
3284
+
3285
+ # Global sequence counter for ALL files (starts at 1)
3286
+ sequence_counter = 1
3287
+ run_token = uuid.uuid4().hex
3288
+
3289
+ # Track Live Photo pairs to ensure consistent naming
3290
+ live_photo_stems: dict[str, str] = {} # Maps content_id -> chosen_stem
3291
+
3292
+ progress = ProgressReporter(len(media_list), "Staging media")
3293
+ for media in media_list:
3294
+ stem = media.source.stem.replace(" ", "_") # Replace spaces to avoid Photos/import quirks
3295
+
3296
+ if stem_needs_sanitization(stem):
3297
+ stem = build_safe_stem(stem, run_token, sequence_counter)
3298
+
3299
+ token = uuid.uuid4().hex[:8]
3300
+ token_component = f"{STAGING_TOKEN_PREFIX}{token}__"
3301
+
3302
+ # Precompute suffix now to enforce Apple Photos filename length limit; no spaces
3303
+ suffix = f"_({sequence_counter})"
3304
+
3305
+ # Enforce both safe-stem limit and Apple Photos filename length (60 chars)
3306
+ max_base_len = max(
3307
+ 5,
3308
+ min(
3309
+ MAX_SAFE_STEM_LENGTH - len(token_component),
3310
+ MAX_PHOTOS_FILENAME_LENGTH - len(token_component) - len(suffix) - len(media.extension),
3311
+ ),
3312
+ )
3313
+ if len(stem) > max_base_len:
3314
+ stem = stem[:max_base_len].rstrip("._- ") or "media"
3315
+
3316
+ tokenized_stem = f"{stem}{token_component}"
3317
+
3318
+ # Handle Live Photo pairs with consistent naming
3319
+ if media.metadata.get("is_live_photo"):
3320
+ content_id = media.metadata.get("live_photo_content_id")
3321
+ if content_id:
3322
+ if content_id in live_photo_stems:
3323
+ # Use the same stem as the paired file
3324
+ live_stem = live_photo_stems[content_id]
3325
+ if stem != live_stem:
3326
+ stem = live_stem
3327
+ max_base_len = max(
3328
+ 5,
3329
+ min(
3330
+ MAX_SAFE_STEM_LENGTH - len(token_component),
3331
+ MAX_PHOTOS_FILENAME_LENGTH - len(token_component) - len(suffix) - len(media.extension),
3332
+ ),
3333
+ )
3334
+ if len(stem) > max_base_len:
3335
+ stem = stem[:max_base_len].rstrip("._- ") or "media"
3336
+ tokenized_stem = f"{stem}{token_component}"
3337
+ LOG.debug("Using paired stem %s for Live Photo %s", stem, media.source.name)
3338
+ else:
3339
+ # First file of the pair - store the sanitized stem for the paired file
3340
+ live_photo_stems[content_id] = stem
3341
+ LOG.debug("Set stem %s for Live Photo pair (content ID: %s)", stem, content_id)
3342
+
3343
+ unique_name = f"{tokenized_stem}{suffix}{media.extension}"
3344
+ destination = staging / unique_name
3345
+
3346
+ # Handle collision (very unlikely with global counter, but safety net)
3347
+ collision_counter = 1
3348
+ while destination.exists():
3349
+ collision_counter += 1
3350
+ unique_name = f"{stem}_({sequence_counter}-{collision_counter}){media.extension}"
3351
+ destination = staging / unique_name
3352
+
3353
+ media.metadata.setdefault("original_source", str(media.source))
3354
+ LOG.debug("%s %s -> %s", "Copying" if copy_files else "Moving", media.source, destination)
3355
+ try:
3356
+ if copy_files:
3357
+ shutil.copy2(str(media.source), str(destination))
3358
+ else:
3359
+ shutil.move(str(media.source), str(destination))
3360
+ except PermissionError as exc:
3361
+ # File might be locked by Apple Photos or another process
3362
+ LOG.warning(
3363
+ "Permission denied for %s (may be locked by Photos): %s",
3364
+ media.source.name,
3365
+ exc,
3366
+ )
3367
+ media.stage_path = None
3368
+ media.metadata["staging_error"] = f"Permission denied: {exc}"
3369
+ progress.update()
3370
+ continue
3371
+ except OSError as exc:
3372
+ if exc.errno == 1: # EPERM - Operation not permitted
3373
+ LOG.warning(
3374
+ "Operation not permitted for %s (may be locked by Photos): %s",
3375
+ media.source.name,
3376
+ exc,
3377
+ )
3378
+ media.stage_path = None
3379
+ media.metadata["staging_error"] = f"Operation not permitted: {exc}"
3380
+ progress.update()
3381
+ continue
3382
+ raise
3383
+ media.stage_path = destination
3384
+ media.metadata["staging_stem"] = stem
3385
+ media.metadata["staging_suffix"] = suffix
3386
+ media.metadata["staging_name"] = destination.name
3387
+ media.metadata["staging_token"] = token
3388
+ media.metadata["staging_tokenized_stem"] = tokenized_stem
3389
+ media.metadata["copy_mode"] = copy_files
3390
+ sequence_counter += 1 # Increment for next file
3391
+
3392
+ # Archive original if processing is required (before conversion)
3393
+ if media.requires_processing and not copy_files:
3394
+ # Use next_available_name for originals since they don't need reconciliation
3395
+ original_target = next_available_name(originals_dir, stem, media.original_suffix or media.extension)
3396
+ try:
3397
+ shutil.copy2(destination, original_target)
3398
+ media.metadata["original_archive"] = str(original_target)
3399
+ except Exception as exc: # noqa: BLE001
3400
+ LOG.warning("Failed to archive original %s: %s", destination, exc)
3401
+
3402
+ progress.update()
3403
+ progress.finish()
3404
+
3405
+
3406
+ def restore_media_file(media: MediaFile) -> None:
3407
+ """Restore media file to original location.
3408
+
3409
+ Used when reverting changes due to errors.
3410
+ No backups are used - the staged file is simply moved back.
3411
+ """
3412
+ if media.metadata.get("copy_mode"):
3413
+ # In copy mode the source is untouched; simply remove staged copy
3414
+ if media.stage_path and media.stage_path.exists():
3415
+ media.stage_path.unlink()
3416
+ media.stage_path = None
3417
+ return
3418
+ restore_path = resolve_restore_path(media.source)
3419
+ restore_path.parent.mkdir(parents=True, exist_ok=True)
3420
+ if media.stage_path and media.stage_path.exists():
3421
+ media.stage_path.rename(restore_path)
3422
+ media.stage_path = None
3423
+
3424
+
3425
+ def convert_image(media: MediaFile) -> None:
3426
+ """Convert image to JPEG format using ffmpeg.
3427
+
3428
+ Converts directly from source to target without creating backups.
3429
+ If conversion fails, the original file is preserved.
3430
+ """
3431
+ assert media.stage_path is not None
3432
+ source = media.stage_path
3433
+ target = next_available_name(source.parent, source.stem, ".jpg")
3434
+
3435
+ cmd = [
3436
+ "ffmpeg",
3437
+ "-y",
3438
+ "-i",
3439
+ str(source),
3440
+ "-map_metadata",
3441
+ "0",
3442
+ "-c:v",
3443
+ "mjpeg",
3444
+ "-qscale:v",
3445
+ "2",
3446
+ str(target),
3447
+ ]
3448
+
3449
+ try:
3450
+ run_checked(cmd)
3451
+ # Conversion succeeded - delete original, use converted file
3452
+ source.unlink()
3453
+ media.stage_path = target
3454
+ media.extension = ".jpg"
3455
+ media.format_name = "jpeg"
3456
+ media.compatible = True
3457
+ except Exception:
3458
+ # Conversion failed - clean up partial target, keep original
3459
+ with suppress(OSError):
3460
+ if target.exists():
3461
+ target.unlink()
3462
+ raise
3463
+
3464
+
3465
+ def convert_video(media: MediaFile) -> None:
3466
+ """Convert video to H.264 MP4 format.
3467
+
3468
+ Converts directly from source to target without creating backups.
3469
+ If conversion fails, the original file is preserved.
3470
+ """
3471
+ assert media.stage_path is not None
3472
+ source = media.stage_path
3473
+ target = next_available_name(source.parent, source.stem, ".mp4")
3474
+
3475
+ cmd = [
3476
+ "ffmpeg",
3477
+ "-y",
3478
+ "-i",
3479
+ str(source),
3480
+ "-map_metadata",
3481
+ "0",
3482
+ "-map",
3483
+ "0:v:0",
3484
+ "-c:v",
3485
+ "libx264",
3486
+ "-preset",
3487
+ "medium",
3488
+ "-crf",
3489
+ "18",
3490
+ "-vf",
3491
+ "scale=trunc(iw/2)*2:trunc(ih/2)*2",
3492
+ "-pix_fmt",
3493
+ "yuv420p",
3494
+ "-movflags",
3495
+ "+faststart",
3496
+ ]
3497
+ if media.audio_codec:
3498
+ cmd.extend(["-map", "0:a:0", "-c:a", "aac", "-b:a", "192k"])
3499
+ else:
3500
+ cmd.append("-an")
3501
+ cmd.append(str(target))
3502
+
3503
+ try:
3504
+ run_checked(cmd)
3505
+ # Conversion succeeded - delete original, use converted file
3506
+ source.unlink()
3507
+ media.stage_path = target
3508
+ media.extension = ".mp4"
3509
+ media.format_name = "mp4"
3510
+ media.video_codec = "h264"
3511
+ media.audio_codec = "aac" if media.audio_codec else None
3512
+ media.compatible = True
3513
+ except Exception:
3514
+ # Conversion failed - clean up partial target, keep original
3515
+ with suppress(OSError):
3516
+ if target.exists():
3517
+ target.unlink()
3518
+ raise
3519
+
3520
+
3521
+ def convert_to_png(media: MediaFile) -> None:
3522
+ """Convert image to PNG format (lossless, widely supported).
3523
+
3524
+ Uses fail-fast approach: no backups, no fallbacks.
3525
+ On success: original file is deleted and media.stage_path updated.
3526
+ On failure: partial target is cleaned up, original remains, exception propagates.
3527
+ """
3528
+ if media.stage_path is None:
3529
+ raise RuntimeError("Stage path missing for PNG conversion")
3530
+ source = media.stage_path
3531
+ target = next_available_name(source.parent, source.stem, ".png")
3532
+
3533
+ # Use ffmpeg for conversion (handles more formats than ImageMagick)
3534
+ cmd = [
3535
+ "ffmpeg",
3536
+ "-y",
3537
+ "-i",
3538
+ str(source),
3539
+ "-pix_fmt",
3540
+ "rgba",
3541
+ str(target),
3542
+ ]
3543
+ try:
3544
+ run_command_with_progress(cmd, "Converting to PNG")
3545
+ copy_metadata_from_source(source, target)
3546
+ source.unlink() # Delete original after successful conversion
3547
+ media.stage_path = target
3548
+ media.extension = ".png"
3549
+ media.format_name = "png"
3550
+ media.requires_processing = False
3551
+ media.compatible = True
3552
+ except Exception:
3553
+ # Clean up partial target, keep original
3554
+ with suppress(OSError):
3555
+ if target.exists():
3556
+ target.unlink()
3557
+ raise
3558
+
3559
+
3560
+ def convert_to_tiff(media: MediaFile) -> None:
3561
+ """Convert image to TIFF format (lossless, 16-bit depth).
3562
+
3563
+ Uses fail-fast approach: no backups, no fallbacks.
3564
+ On success: original file is deleted and media.stage_path updated.
3565
+ On failure: partial target is cleaned up, original remains, exception propagates.
3566
+ """
3567
+ if media.stage_path is None:
3568
+ raise RuntimeError("Stage path missing for TIFF conversion")
3569
+ source = media.stage_path
3570
+ target = next_available_name(source.parent, source.stem, ".tiff")
3571
+
3572
+ # Use ImageMagick for conversion with 16-bit depth
3573
+ cmd = [
3574
+ resolve_imagemagick_command(),
3575
+ str(source),
3576
+ "-alpha",
3577
+ "on",
3578
+ "-depth",
3579
+ "16",
3580
+ "-flatten",
3581
+ str(target),
3582
+ ]
3583
+ try:
3584
+ run_command_with_progress(cmd, "Converting to TIFF")
3585
+ copy_metadata_from_source(source, target)
3586
+ source.unlink() # Delete original after successful conversion
3587
+ media.stage_path = target
3588
+ media.extension = ".tiff"
3589
+ media.format_name = "tiff"
3590
+ media.requires_processing = False
3591
+ media.compatible = True
3592
+ except Exception:
3593
+ # Clean up partial target, keep original
3594
+ with suppress(OSError):
3595
+ if target.exists():
3596
+ target.unlink()
3597
+ raise
3598
+
3599
+
3600
+ def convert_to_heic_lossless(media: MediaFile) -> None:
3601
+ """
3602
+ Convert media to lossless HEIC format using heif-enc or ffmpeg.
3603
+
3604
+ Handles JPEG XL sources by first decoding to PNG via djxl, then encoding to HEIC.
3605
+ If djxl is unavailable for JXL input, falls back to TIFF conversion.
3606
+
3607
+ Uses fail-fast approach: no backups, no fallbacks.
3608
+ On success: original file is deleted and media.stage_path updated.
3609
+ On failure: partial target and intermediate files are cleaned up, original remains, exception propagates.
3610
+ """
3611
+ if media.stage_path is None:
3612
+ raise RuntimeError("Stage path missing for HEIC conversion")
3613
+ source = media.stage_path
3614
+ target = next_available_name(source.parent, source.stem, ".heic")
3615
+
3616
+ intermediate: Optional[Path] = None
3617
+ try:
3618
+ if source.suffix.lower() == ".jxl":
3619
+ djxl = find_executable("djxl")
3620
+ if not djxl:
3621
+ # djxl not available - fall back to TIFF conversion instead
3622
+ LOG.warning("djxl not available; falling back to TIFF conversion")
3623
+ convert_to_tiff(media)
3624
+ return
3625
+ # Decode JXL to intermediate PNG for HEIC encoding
3626
+ fd, tmp_path = tempfile.mkstemp(suffix=".png", prefix="smm_jxl_")
3627
+ os.close(fd)
3628
+ intermediate = Path(tmp_path)
3629
+ run_command_with_progress(
3630
+ [djxl, str(source), str(intermediate), "--lossless"],
3631
+ "Decoding JPEG XL",
3632
+ )
3633
+ source_for_heic = intermediate
3634
+ else:
3635
+ source_for_heic = source
3636
+
3637
+ # Encode to HEIC using heif-enc or ffmpeg
3638
+ heif_enc = find_executable("heif-enc")
3639
+ if heif_enc and source_for_heic.suffix.lower() in {
3640
+ ".png",
3641
+ ".tif",
3642
+ ".tiff",
3643
+ ".jpg",
3644
+ ".jpeg",
3645
+ ".bmp",
3646
+ }:
3647
+ cmd = [heif_enc, "--lossless", str(source_for_heic), str(target)]
3648
+ run_command_with_progress(cmd, "Encoding HEIC (lossless)")
3649
+ else:
3650
+ ffmpeg = ensure_ffmpeg_path()
3651
+ cmd = [
3652
+ ffmpeg,
3653
+ "-y",
3654
+ "-i",
3655
+ str(source_for_heic),
3656
+ "-c:v",
3657
+ "libx265",
3658
+ "-preset",
3659
+ "slow",
3660
+ "-x265-params",
3661
+ "lossless=1",
3662
+ "-pix_fmt",
3663
+ "yuv444p10le",
3664
+ str(target),
3665
+ ]
3666
+ run_command_with_progress(cmd, "Encoding HEIC via ffmpeg")
3667
+
3668
+ # Conversion succeeded - copy metadata, delete original, update media
3669
+ copy_metadata_from_source(source, target)
3670
+ source.unlink()
3671
+ media.stage_path = target
3672
+ media.extension = ".heic"
3673
+ media.format_name = "heic"
3674
+ media.requires_processing = False
3675
+ media.compatible = True
3676
+ except Exception:
3677
+ # Clean up partial target and intermediate files, keep original
3678
+ with suppress(OSError):
3679
+ if target.exists():
3680
+ target.unlink()
3681
+ raise
3682
+ finally:
3683
+ # Always clean up intermediate file if created
3684
+ if intermediate and intermediate.exists():
3685
+ with suppress(OSError):
3686
+ intermediate.unlink()
3687
+
3688
+
3689
+ def convert_animation_to_hevc_mp4(media: MediaFile) -> None:
3690
+ """Convert animated media (GIF, APNG, etc.) to HEVC-encoded MP4 for Photos compatibility.
3691
+
3692
+ Uses lossless HEVC encoding with 10-bit YUV444 color space to preserve visual quality.
3693
+ Removes audio tracks as Photos does not support audio in animated images.
3694
+ Converts in-place by overwriting the original stage file.
3695
+ Fails fast on any error - no backups, no rollbacks.
3696
+
3697
+ Args:
3698
+ media: MediaFile object with stage_path set to the file to convert
3699
+
3700
+ Raises:
3701
+ RuntimeError: If stage_path is None
3702
+ CalledProcessError: If ffmpeg conversion fails
3703
+ """
3704
+ if media.stage_path is None:
3705
+ raise RuntimeError("Stage path missing for animation conversion")
3706
+ original_stage = media.stage_path # Source file to convert in-place
3707
+ target = next_available_name(original_stage.parent, original_stage.stem, ".mp4") # Target extension is .mp4
3708
+ ffmpeg = ensure_ffmpeg_path()
3709
+ cmd = [
3710
+ ffmpeg,
3711
+ "-y", # Overwrite output file
3712
+ "-i",
3713
+ str(original_stage), # Use original stage directly as input
3714
+ "-vf",
3715
+ "scale=trunc(iw/2)*2:trunc(ih/2)*2", # Ensure even dimensions for HEVC
3716
+ "-c:v",
3717
+ "libx265", # HEVC video codec
3718
+ "-preset",
3719
+ "slow", # Better compression at cost of encoding time
3720
+ "-x265-params",
3721
+ "lossless=1", # Lossless encoding to preserve quality
3722
+ "-pix_fmt",
3723
+ "yuv444p10le", # 10-bit color for animations
3724
+ "-an", # Remove audio tracks
3725
+ str(target),
3726
+ ]
3727
+ run_command_with_progress(cmd, "Converting animation to HEVC") # No try-except, fail fast
3728
+ original_stage.unlink() # Delete original file after successful conversion
3729
+ media.stage_path = target # Update to new converted file
3730
+ media.extension = ".mp4" # Target extension
3731
+ media.format_name = "mp4" # Format name for mp4 container
3732
+ media.video_codec = "hevc"
3733
+ media.audio_codec = None # Audio removed
3734
+ media.kind = "video"
3735
+ media.requires_processing = False
3736
+ media.compatible = True
3737
+
3738
+
3739
+ def rewrap_to_mp4(media: MediaFile) -> None:
3740
+ """Rewrap media file to MP4 container without re-encoding.
3741
+
3742
+ Converts the container format to MP4 while copying all streams and metadata
3743
+ without transcoding. Uses faststart flag for web-optimized playback.
3744
+ Fails fast on any error - no backups, no rollbacks.
3745
+
3746
+ Args:
3747
+ media: MediaFile instance with valid stage_path
3748
+
3749
+ Raises:
3750
+ RuntimeError: If stage_path is missing
3751
+ subprocess.CalledProcessError: If ffmpeg command fails
3752
+ """
3753
+ if media.stage_path is None:
3754
+ raise RuntimeError("Stage path missing for rewrap")
3755
+ original_stage = media.stage_path
3756
+ target = next_available_name(original_stage.parent, original_stage.stem, ".mp4")
3757
+ ffmpeg = ensure_ffmpeg_path()
3758
+ cmd = [
3759
+ ffmpeg,
3760
+ "-y",
3761
+ "-i",
3762
+ str(original_stage),
3763
+ "-c",
3764
+ "copy",
3765
+ "-map",
3766
+ "0",
3767
+ "-map_metadata",
3768
+ "0",
3769
+ "-movflags",
3770
+ "+faststart",
3771
+ str(target),
3772
+ ]
3773
+ run_command_with_progress(cmd, "Rewrapping container")
3774
+ original_stage.unlink() # Delete original after successful rewrap
3775
+ media.stage_path = target
3776
+ media.extension = ".mp4"
3777
+ media.format_name = "mp4"
3778
+ media.requires_processing = False
3779
+ media.compatible = True
3780
+
3781
+
3782
+ def transcode_to_hevc_mp4(media: MediaFile, copy_audio: bool = False) -> None:
3783
+ """Transcode video to HEVC (H.265) in MP4 container with optional audio handling.
3784
+
3785
+ Converts the staged media file to HEVC video codec with lossless encoding parameters.
3786
+ Audio can either be copied from source or re-encoded to AAC 256k.
3787
+ Updates the media object with new format metadata and marks it as compatible.
3788
+
3789
+ Uses fail-fast approach: no backups, no fallbacks.
3790
+ On success: original file is deleted and media.stage_path updated to target.
3791
+ On failure: partial target is cleaned up, original remains, exception propagates.
3792
+
3793
+ Args:
3794
+ media: MediaFile object with valid stage_path to be transcoded
3795
+ copy_audio: If True, copy audio stream as-is; if False, transcode to AAC 256k
3796
+
3797
+ Raises:
3798
+ RuntimeError: If media.stage_path is None
3799
+ Exception: If ffmpeg transcoding fails (failure propagates after cleanup)
3800
+ """
3801
+ if media.stage_path is None:
3802
+ raise RuntimeError("Stage path missing for transcode")
3803
+ source = media.stage_path
3804
+ target = next_available_name(source.parent, source.stem, ".mp4")
3805
+ ffmpeg = ensure_ffmpeg_path()
3806
+ cmd = [
3807
+ ffmpeg,
3808
+ "-y",
3809
+ "-i",
3810
+ str(source),
3811
+ "-c:v",
3812
+ "libx265",
3813
+ "-preset",
3814
+ "slow",
3815
+ "-x265-params",
3816
+ "lossless=1",
3817
+ "-pix_fmt",
3818
+ "yuv420p10le",
3819
+ "-map_metadata",
3820
+ "0",
3821
+ ]
3822
+ if copy_audio:
3823
+ cmd.extend(["-c:a", "copy"])
3824
+ else:
3825
+ cmd.extend(["-c:a", "aac", "-b:a", "256k"])
3826
+ cmd.append(str(target))
3827
+ try:
3828
+ run_command_with_progress(cmd, "Transcoding to HEVC")
3829
+ # Transcoding succeeded - delete original, use transcoded file
3830
+ source.unlink()
3831
+ media.stage_path = target
3832
+ media.extension = ".mp4"
3833
+ media.format_name = "mp4"
3834
+ media.video_codec = "hevc"
3835
+ media.audio_codec = media.audio_codec if copy_audio else "aac"
3836
+ media.requires_processing = False
3837
+ media.compatible = True
3838
+ except Exception:
3839
+ # Transcoding failed - clean up partial target, keep original
3840
+ with suppress(OSError):
3841
+ if target.exists():
3842
+ target.unlink()
3843
+ raise
3844
+
3845
+
3846
+ def transcode_audio_to_supported(media: MediaFile) -> None:
3847
+ """Transcode audio to supported codec (AAC or EAC3) in MP4 container.
3848
+
3849
+ Converts directly from source to target without creating backups.
3850
+ If conversion fails, the original file is preserved.
3851
+ Uses EAC3 for 5.1/7.1 surround sound, AAC for stereo/mono.
3852
+ """
3853
+ assert media.stage_path is not None
3854
+ source = media.stage_path
3855
+ target = next_available_name(source.parent, source.stem, ".mp4")
3856
+ ffmpeg = ensure_ffmpeg_path()
3857
+ channels = int(media.metadata.get("audio_channels", 0) or 0)
3858
+ layout = str(media.metadata.get("audio_layout", "") or "").lower()
3859
+ if channels >= 6 or "7.1" in layout or "5.1" in layout:
3860
+ audio_codec = "eac3"
3861
+ audio_args = ["-c:a", "eac3", "-b:a", "768k"]
3862
+ else:
3863
+ audio_codec = "aac"
3864
+ audio_args = ["-c:a", "aac", "-b:a", "256k"]
3865
+ cmd = (
3866
+ [
3867
+ ffmpeg,
3868
+ "-y",
3869
+ "-i",
3870
+ str(source),
3871
+ "-c:v",
3872
+ "copy",
3873
+ ]
3874
+ + audio_args
3875
+ + [
3876
+ "-map_metadata",
3877
+ "0",
3878
+ str(target),
3879
+ ]
3880
+ )
3881
+ try:
3882
+ run_command_with_progress(cmd, "Normalising audio codec")
3883
+ # Conversion succeeded - delete original, use converted file
3884
+ source.unlink()
3885
+ media.stage_path = target
3886
+ media.extension = ".mp4"
3887
+ media.format_name = "mp4"
3888
+ media.audio_codec = audio_codec
3889
+ media.requires_processing = False
3890
+ media.compatible = True
3891
+ except Exception:
3892
+ # Conversion failed - clean up partial target, keep original
3893
+ with suppress(OSError):
3894
+ if target.exists():
3895
+ target.unlink()
3896
+ raise
3897
+
3898
+
3899
+ def rewrap_or_transcode_to_mp4(media: MediaFile) -> None:
3900
+ """Rewrap video to MP4 container, transcode to HEVC on failure.
3901
+
3902
+ Uses fail-fast approach: no backups, no fallbacks.
3903
+ First attempts fast rewrap (copy streams), if that fails tries transcode.
3904
+ On success: original file is deleted and media.stage_path updated.
3905
+ On failure: partial target is cleaned up, original remains, exception propagates.
3906
+ """
3907
+ if media.stage_path is None:
3908
+ raise RuntimeError("Stage path missing for rewrap/transcode")
3909
+ source = media.stage_path
3910
+ target = next_available_name(source.parent, source.stem, ".mp4")
3911
+ ffmpeg = ensure_ffmpeg_path()
3912
+
3913
+ # First attempt: fast rewrap (copy all streams)
3914
+ rewrap_cmd = [
3915
+ ffmpeg,
3916
+ "-y",
3917
+ "-i",
3918
+ str(source),
3919
+ "-c",
3920
+ "copy",
3921
+ "-map",
3922
+ "0",
3923
+ "-map_metadata",
3924
+ "0",
3925
+ "-movflags",
3926
+ "+faststart",
3927
+ str(target),
3928
+ ]
3929
+
3930
+ try:
3931
+ run_command_with_progress(rewrap_cmd, "Rewrapping to MP4")
3932
+ source.unlink() # Delete original after successful rewrap
3933
+ media.stage_path = target
3934
+ media.extension = ".mp4"
3935
+ media.format_name = "mp4"
3936
+ media.requires_processing = False
3937
+ media.compatible = True
3938
+ return
3939
+ except Exception:
3940
+ # Rewrap failed, clean up and try transcode
3941
+ with suppress(OSError):
3942
+ if target.exists():
3943
+ target.unlink()
3944
+
3945
+ # Second attempt: transcode to HEVC
3946
+ target = next_available_name(source.parent, source.stem, ".mp4")
3947
+ transcode_cmd = [
3948
+ ffmpeg,
3949
+ "-y",
3950
+ "-i",
3951
+ str(source),
3952
+ "-c:v",
3953
+ "libx265",
3954
+ "-preset",
3955
+ "medium",
3956
+ "-crf",
3957
+ "23",
3958
+ "-pix_fmt",
3959
+ "yuv420p",
3960
+ "-c:a",
3961
+ "aac",
3962
+ "-b:a",
3963
+ "192k",
3964
+ "-movflags",
3965
+ "+faststart",
3966
+ str(target),
3967
+ ]
3968
+
3969
+ try:
3970
+ run_command_with_progress(transcode_cmd, "Transcoding to HEVC MP4")
3971
+ source.unlink() # Delete original after successful transcode
3972
+ media.stage_path = target
3973
+ media.extension = ".mp4"
3974
+ media.format_name = "mp4"
3975
+ media.requires_processing = False
3976
+ media.compatible = True
3977
+ except Exception:
3978
+ # Clean up partial target, keep original
3979
+ with suppress(OSError):
3980
+ if target.exists():
3981
+ target.unlink()
3982
+ raise
3983
+
3984
+
3985
+ def skip_unknown_video(media: MediaFile, skip_logger: SkipLogger) -> bool:
3986
+ skip_logger.log(media.source, "unsupported video format")
3987
+ restore_media_file(media)
3988
+ return False
3989
+
3990
+
3991
+ def resolve_restore_path(path: Path) -> Path:
3992
+ if not path.exists():
3993
+ return path
3994
+ return next_available_name(path.parent, path.stem, path.suffix)
3995
+
3996
+
3997
+ def revert_media_files(media_files: Iterable[MediaFile], staging: Optional[Path]) -> None:
3998
+ for media in media_files:
3999
+ original = media.source
4000
+ try:
4001
+ if media.stage_path and media.stage_path.exists():
4002
+ if media.metadata.get("copy_mode"):
4003
+ media.stage_path.unlink(missing_ok=True)
4004
+ media.stage_path = None
4005
+ continue
4006
+ restore_path = resolve_restore_path(original)
4007
+ restore_path.parent.mkdir(parents=True, exist_ok=True)
4008
+ media.stage_path.rename(restore_path)
4009
+ media.stage_path = None
4010
+ except Exception as exc: # noqa: BLE001
4011
+ LOG.warning("Failed to restore %s: %s", original, exc)
4012
+ if staging and staging.exists():
4013
+ shutil.rmtree(staging, ignore_errors=True)
4014
+
4015
+
4016
+ def ensure_compatibility(
4017
+ media_files: list[MediaFile],
4018
+ skip_logger: SkipLogger,
4019
+ stats: RunStatistics,
4020
+ skip_convert: bool = False,
4021
+ ) -> None:
4022
+ retained: list[MediaFile] = []
4023
+ progress = ProgressReporter(len(media_files), "Ensuring compatibility")
4024
+
4025
+ def is_already_photos_compatible(media: MediaFile) -> bool:
4026
+ if media.kind == "image":
4027
+ return media.extension.lower() in COMPATIBLE_IMAGE_EXTENSIONS
4028
+ if media.kind == "video":
4029
+ container = (media.metadata.get("container") or media.format_name or "").lower()
4030
+ video_codec = (media.video_codec or "").lower()
4031
+ audio_codec = (media.audio_codec or "").lower()
4032
+ return container in COMPATIBLE_VIDEO_CONTAINERS and video_codec in COMPATIBLE_VIDEO_CODECS and (not audio_codec or audio_codec in COMPATIBLE_AUDIO_CODECS)
4033
+ return False
4034
+
4035
+ for media in media_files:
4036
+ if media.stage_path is None or not media.stage_path.exists():
4037
+ skip_logger.log(media.source, "staged file missing before processing")
4038
+ progress.update()
4039
+ continue
4040
+
4041
+ if media.action == "skip_vector":
4042
+ skip_logger.log(media.source, "vector artwork not supported")
4043
+ restore_media_file(media)
4044
+ progress.update()
4045
+ continue
4046
+
4047
+ if media.action == "skip_unknown_video":
4048
+ if not skip_unknown_video(media, skip_logger):
4049
+ progress.update()
4050
+ continue
4051
+
4052
+ # Skip all conversions if flag is set (for format testing)
4053
+ if skip_convert:
4054
+ # Mark as compatible and treat as import-ready
4055
+ media.requires_processing = False
4056
+ media.compatible = True
4057
+ retained.append(media)
4058
+ progress.update()
4059
+ continue
4060
+
4061
+ try:
4062
+ # Do not process files the detector already marked as compatible
4063
+ if media.detected_compatible and media.action != "import":
4064
+ LOG.debug("Bypassing processing for compatible media %s (action %s)", media.stage_path, media.action)
4065
+ media.requires_processing = False
4066
+ media.compatible = True
4067
+ media.action = "import"
4068
+ retained.append(media)
4069
+ progress.update()
4070
+ continue
4071
+
4072
+ # Extra guard: heuristically skip conversion if container/codec are Photos-compatible
4073
+ if is_already_photos_compatible(media):
4074
+ media.requires_processing = False
4075
+ media.compatible = True
4076
+ media.action = "import"
4077
+ retained.append(media)
4078
+ progress.update()
4079
+ continue
4080
+
4081
+ if media.action == "import":
4082
+ media.requires_processing = False
4083
+ media.compatible = True
4084
+ elif media.action == "convert_to_png":
4085
+ LOG.debug("Converting %s to PNG: %s", media.format_name, media.stage_path)
4086
+ stats.conversion_attempted += 1
4087
+ convert_to_png(media)
4088
+ stats.conversion_succeeded += 1
4089
+ media.was_converted = True
4090
+ LOG.debug("Successfully converted to PNG: %s", media.stage_path)
4091
+ elif media.action == "convert_to_tiff":
4092
+ LOG.debug("Converting %s to TIFF: %s", media.format_name, media.stage_path)
4093
+ stats.conversion_attempted += 1
4094
+ convert_to_tiff(media)
4095
+ stats.conversion_succeeded += 1
4096
+ media.was_converted = True
4097
+ LOG.debug("Successfully converted to TIFF: %s", media.stage_path)
4098
+ elif media.action == "convert_to_heic_lossless":
4099
+ LOG.debug("Converting %s to lossless HEIC: %s", media.format_name, media.stage_path)
4100
+ stats.conversion_attempted += 1
4101
+ convert_to_heic_lossless(media)
4102
+ stats.conversion_succeeded += 1
4103
+ media.was_converted = True
4104
+ LOG.debug("Successfully converted to HEIC: %s", media.stage_path)
4105
+ elif media.action == "convert_animation_to_hevc_mp4":
4106
+ LOG.debug("Converting animated %s to HEVC MP4: %s", media.format_name, media.stage_path)
4107
+ stats.conversion_attempted += 1
4108
+ convert_animation_to_hevc_mp4(media)
4109
+ stats.conversion_succeeded += 1
4110
+ media.was_converted = True
4111
+ LOG.debug("Successfully converted animation to HEVC MP4: %s", media.stage_path)
4112
+ elif media.action == "rewrap_to_mp4":
4113
+ LOG.debug("Rewrapping %s (%s/%s) to MP4 container: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
4114
+ stats.conversion_attempted += 1
4115
+ rewrap_to_mp4(media)
4116
+ stats.conversion_succeeded += 1
4117
+ media.was_converted = True
4118
+ LOG.debug("Successfully rewrapped to MP4: %s", media.stage_path)
4119
+ elif media.action == "transcode_to_hevc_mp4":
4120
+ LOG.debug("Transcoding %s (%s/%s) to HEVC MP4: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
4121
+ stats.conversion_attempted += 1
4122
+ transcode_to_hevc_mp4(media, copy_audio=False)
4123
+ stats.conversion_succeeded += 1
4124
+ media.was_converted = True
4125
+ LOG.debug("Successfully transcoded to HEVC MP4: %s", media.stage_path)
4126
+ elif media.action == "transcode_video_to_lossless_hevc":
4127
+ LOG.debug("Transcoding %s (%s/%s) to lossless HEVC MP4: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
4128
+ stats.conversion_attempted += 1
4129
+ transcode_to_hevc_mp4(media, copy_audio=True)
4130
+ stats.conversion_succeeded += 1
4131
+ media.was_converted = True
4132
+ LOG.debug("Successfully transcoded to lossless HEVC MP4: %s", media.stage_path)
4133
+ elif media.action == "transcode_audio_to_aac_or_eac3":
4134
+ LOG.debug("Transcoding audio in %s (%s) to AAC/EAC-3: %s", media.format_name, media.audio_codec or "unknown", media.stage_path)
4135
+ stats.conversion_attempted += 1
4136
+ transcode_audio_to_supported(media)
4137
+ stats.conversion_succeeded += 1
4138
+ media.was_converted = True
4139
+ LOG.debug("Successfully transcoded audio: %s", media.stage_path)
4140
+ elif media.action == "rewrap_or_transcode_to_mp4":
4141
+ LOG.debug("Rewrapping/transcoding %s (%s/%s) to MP4: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
4142
+ stats.conversion_attempted += 1
4143
+ rewrap_or_transcode_to_mp4(media)
4144
+ stats.conversion_succeeded += 1
4145
+ media.was_converted = True
4146
+ LOG.debug("Successfully converted to MP4: %s", media.stage_path)
4147
+ else:
4148
+ # Default: keep and log unknown action
4149
+ skip_logger.log(media.source, f"unhandled action {media.action}, treating as import")
4150
+ media.requires_processing = False
4151
+ media.compatible = True
4152
+ except Exception as exc: # noqa: BLE001
4153
+ stats.conversion_failed += 1
4154
+ skip_logger.log(media.source, f"processing failed: {exc}")
4155
+ restore_media_file(media)
4156
+ progress.update()
4157
+ continue
4158
+
4159
+ retained.append(media)
4160
+ progress.update()
4161
+
4162
+ media_files[:] = retained
4163
+ progress.finish()
4164
+
4165
+
4166
+ def update_stats_after_compatibility(stats: RunStatistics, media_files: list[MediaFile]) -> None:
4167
+ stats.total_media_detected = len(media_files)
4168
+ detected_compatible = sum(1 for media in media_files if media.detected_compatible)
4169
+ stats.media_compatible = detected_compatible
4170
+ stats.media_incompatible = stats.total_media_detected - detected_compatible
4171
+ stats.incompatible_with_conversion_rule = sum(1 for media in media_files if not media.detected_compatible and media.was_converted)
4172
+ stats.staging_total = sum(1 for media in media_files if media.stage_path and media.stage_path.exists())
4173
+ stats.staging_expected = detected_compatible + stats.incompatible_with_conversion_rule
4174
+
4175
+
4176
+ def run_checked(cmd: list[str]) -> None:
4177
+ LOG.debug("Executing command: %s", " ".join(cmd))
4178
+ result = subprocess.run(cmd, capture_output=True, text=True, check=False)
4179
+ if result.returncode != 0:
4180
+ LOG.error("Command failed: %s", result.stderr.strip())
4181
+ raise RuntimeError(f"Command '{cmd[0]}' failed with exit code {result.returncode}.")
4182
+
4183
+
4184
+ def import_folder_to_photos(
4185
+ staging_dir: Path,
4186
+ media_files: list[MediaFile],
4187
+ album_name: str,
4188
+ skip_duplicates: bool = True,
4189
+ ) -> tuple[int, int, list[MediaFile]]:
4190
+ """Import entire staging folder in a single Photos.app call.
4191
+
4192
+ Uses Photos' native folder import which handles queue management natively.
4193
+ Returns imported filenames and reconciles against staged files to determine
4194
+ which files were imported vs skipped.
4195
+
4196
+ This eliminates ALL timing dependencies and batch management complexity.
4197
+ Photos.app manages its own import queue, preventing resource exhaustion.
4198
+
4199
+ Args:
4200
+ staging_dir: Path to staging folder containing all media files
4201
+ media_files: List of MediaFile objects with stage_path set
4202
+ album_name: Photos album name to import into
4203
+ skip_duplicates: If True, skip duplicate checking (default: True)
4204
+
4205
+ Returns:
4206
+ Tuple of (imported_count, skipped_count, skipped_media_files)
4207
+
4208
+ Raises:
4209
+ RuntimeError: If Photos.app import fails with error
4210
+ """
4211
+ # DEBUG: Timestamp when function execution begins
4212
+ function_start_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
4213
+ LOG.debug("✅ TIMESTAMP %s - import_folder_to_photos function EXECUTION STARTED", function_start_timestamp)
4214
+
4215
+ staged_media = [media for media in media_files if media.stage_path and media.stage_path.exists()]
4216
+ if not staged_media:
4217
+ return 0, 0, []
4218
+
4219
+ # Build AppleScript for folder import (based on import2photos.sh)
4220
+ # Uses 24-hour timeout to prevent AppleEvent timeout (-1712) when Photos shows dialogs
4221
+ applescript = """
4222
+ on run argv
4223
+ if (count of argv) < 3 then return "ERR\\t0\\tMissing arguments"
4224
+ set albumName to item 1 of argv
4225
+ set skipDup to ((item 2 of argv) is "true")
4226
+ set dirPath to item 3 of argv
4227
+
4228
+ script util
4229
+ on sanitizeText(srcText)
4230
+ set oldTIDs to AppleScript's text item delimiters
4231
+ set AppleScript's text item delimiters to {return, linefeed, tab}
4232
+ set parts to text items of srcText
4233
+ set AppleScript's text item delimiters to " "
4234
+ set out to parts as text
4235
+ set AppleScript's text item delimiters to oldTIDs
4236
+ return out
4237
+ end sanitizeText
4238
+ end script
4239
+
4240
+ try
4241
+ set folderAlias to POSIX file (dirPath as text)
4242
+ on error errMsg number errNum
4243
+ return "ERR\\t" & (errNum as text) & "\\t" & errMsg
4244
+ end try
4245
+
4246
+ set outLines to {}
4247
+ tell application id "com.apple.Photos"
4248
+ activate
4249
+ -- Use very long timeout (24 hours) to allow user interaction with Photos dialogs
4250
+ with timeout of 86400 seconds
4251
+ try
4252
+ if (count of (albums whose name is albumName)) = 0 then
4253
+ make new album named albumName
4254
+ end if
4255
+ set tgtAlbum to first album whose name is albumName
4256
+
4257
+ -- SINGLE folder import call - Photos manages the queue natively
4258
+ set importedItems to import folderAlias skip check duplicates skipDup
4259
+
4260
+ if (count of importedItems) > 0 then
4261
+ add importedItems to tgtAlbum
4262
+ end if
4263
+
4264
+ -- Return filenames of imported items for reconciliation
4265
+ repeat with mi in importedItems
4266
+ try
4267
+ set fn to filename of mi
4268
+ set fn2 to util's sanitizeText(fn)
4269
+ set end of outLines to "FN\\t" & fn2
4270
+ end try
4271
+ end repeat
4272
+ on error errMsg number errNum
4273
+ return "ERR\\t" & (errNum as text) & "\\t" & errMsg
4274
+ end try
4275
+ end timeout
4276
+ end tell
4277
+
4278
+ set oldTIDs to AppleScript's text item delimiters
4279
+ set AppleScript's text item delimiters to linefeed
4280
+ set outText to outLines as text
4281
+ set AppleScript's text item delimiters to oldTIDs
4282
+ return outText
4283
+ end run
4284
+ """
4285
+
4286
+ # Execute AppleScript with folder import - with retry logic for Photos dialogs
4287
+ LOG.info("Importing staging folder into Photos album '%s'...", album_name)
4288
+
4289
+ def run_import_applescript() -> subprocess.CompletedProcess[str]:
4290
+ """Execute the import AppleScript. No timeout - AppleScript has its own 24h timeout."""
4291
+ return subprocess.run(
4292
+ ["osascript", "-", album_name, str(skip_duplicates).lower(), str(staging_dir)],
4293
+ input=applescript,
4294
+ capture_output=True,
4295
+ text=True,
4296
+ check=False,
4297
+ )
4298
+
4299
+ # Retry loop for AppleEvent timeout (-1712) when Photos shows dialogs
4300
+ max_retries = 10
4301
+ for attempt in range(max_retries):
4302
+ # DEBUG: Timestamp when AppleScript execution begins
4303
+ applescript_start_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
4304
+ LOG.debug("📸 TIMESTAMP %s - About to execute AppleScript (osascript) to import folder to Photos.app (attempt %d/%d)", applescript_start_timestamp, attempt + 1, max_retries)
4305
+
4306
+ result = run_import_applescript()
4307
+
4308
+ # DEBUG: Timestamp when AppleScript execution completes
4309
+ applescript_end_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
4310
+ LOG.debug("📸 TIMESTAMP %s - AppleScript execution COMPLETED", applescript_end_timestamp)
4311
+
4312
+ output = result.stdout.strip()
4313
+
4314
+ if LOG.isEnabledFor(logging.DEBUG):
4315
+ debug_parent = _log_directory() or staging_dir.parent
4316
+ timestamp_segment = dt.datetime.now().strftime("%Y%m%d%H%M%S")
4317
+ debug_parent.mkdir(parents=True, exist_ok=True)
4318
+ raw_output_file = debug_parent / f"DEBUG_raw_applescript_output_{timestamp_segment}.txt"
4319
+ with raw_output_file.open("wb") as binary_handle:
4320
+ binary_handle.write(result.stdout.encode("utf-8"))
4321
+ LOG.debug("DEBUG: Raw AppleScript output saved to %s (%d bytes)", raw_output_file, len(result.stdout))
4322
+
4323
+ # Check for AppleEvent timeout error (-1712) - Photos was showing a dialog
4324
+ if output.startswith("ERR\t"):
4325
+ parts = output.split("\t")
4326
+ err_code = parts[1] if len(parts) > 1 else "0"
4327
+ err_msg = parts[2] if len(parts) > 2 else "Unknown error"
4328
+
4329
+ # Error -1712 is "AppleEvent timed out" - Photos was waiting for user interaction
4330
+ if err_code == "-1712":
4331
+ print("\n⚠️ Apple Photos is waiting for user interaction (dialog open)")
4332
+ print(" Please close any Photos dialogs and press Enter to retry...")
4333
+ print(" Or type 'abort' to cancel the import.")
4334
+ try:
4335
+ user_input = input(f" [Attempt {attempt + 1}/{max_retries}] Press Enter to retry or 'abort' to cancel: ").strip().lower()
4336
+ if user_input == "abort":
4337
+ raise RuntimeError(f"Photos import aborted by user after AppleEvent timeout [{err_code}]: {err_msg}")
4338
+ LOG.info("Retrying Photos import after user closed dialog...")
4339
+ continue # Retry the import
4340
+ except (KeyboardInterrupt, EOFError):
4341
+ raise RuntimeError(f"Photos import cancelled by user [{err_code}]: {err_msg}")
4342
+
4343
+ # Other errors are fatal
4344
+ raise RuntimeError(f"Photos import failed [{err_code}]: {err_msg}")
4345
+
4346
+ # Success - no error, break out of retry loop
4347
+ break
4348
+ else:
4349
+ # Exhausted all retries
4350
+ raise RuntimeError(f"Photos import failed after {max_retries} attempts due to repeated AppleEvent timeouts")
4351
+
4352
+ # Parse imported filenames from AppleScript output
4353
+ # Format: "FN\t<filename>" per line
4354
+ imported_names = []
4355
+ line_count = 0
4356
+ for line in output.split("\n"):
4357
+ line_count += 1
4358
+ line = line.strip()
4359
+ if line.startswith("FN\t"):
4360
+ filename = line[3:] # Remove "FN\t" prefix
4361
+ imported_names.append(filename)
4362
+
4363
+ LOG.debug(f"DEBUG: Parsed {len(imported_names)} filenames from {line_count} total lines")
4364
+
4365
+ LOG.debug("Photos returned %d imported filenames", len(imported_names))
4366
+
4367
+ if LOG.isEnabledFor(logging.DEBUG):
4368
+ debug_parent = _log_directory() or staging_dir.parent
4369
+ timestamp_segment = dt.datetime.now().strftime("%Y%m%d%H%M%S")
4370
+ debug_parent.mkdir(parents=True, exist_ok=True)
4371
+ photos_output_file = debug_parent / f"DEBUG_photos_output_{timestamp_segment}.txt"
4372
+ with photos_output_file.open("w", encoding="utf-8") as text_handle:
4373
+ text_handle.write("FILENAMES RETURNED BY PHOTOS.APP:\n")
4374
+ text_handle.write("=" * 80 + "\n")
4375
+ for name in sorted(imported_names):
4376
+ text_handle.write(f"{name}\n")
4377
+ LOG.debug("DEBUG: Photos output saved to %s", photos_output_file)
4378
+
4379
+ # DEBUG: Log first 5 filenames returned by Photos
4380
+ LOG.debug("DEBUG: First 5 filenames returned by Photos:")
4381
+ for i, name in enumerate(imported_names[:5]):
4382
+ LOG.debug(f" [{i}] {repr(name)}")
4383
+
4384
+ photos_imported_count = len(imported_names)
4385
+ staged_count = len(staged_media)
4386
+
4387
+ LOG.debug("Reconciliation: Photos returned %d items, staged %d files", photos_imported_count, staged_count)
4388
+
4389
+ token_to_media: dict[str, MediaFile] = {}
4390
+ for media in staged_media:
4391
+ token_value = media.metadata.get("staging_token")
4392
+ if token_value:
4393
+ token_to_media[token_value] = media
4394
+
4395
+ imported_media: list[MediaFile] = []
4396
+ skipped_media: list[MediaFile] = []
4397
+ matched_media_ids: set[int] = set()
4398
+ unmatched_names: list[str] = []
4399
+
4400
+ for name in imported_names:
4401
+ tokens = [match.group(1) for match in STAGING_TOKEN_PATTERN.finditer(name)]
4402
+ assigned = False
4403
+ for token_value in tokens:
4404
+ matched_media = token_to_media.get(token_value)
4405
+ if matched_media and id(matched_media) not in matched_media_ids:
4406
+ token_to_media.pop(token_value, None)
4407
+ matched_media_ids.add(id(matched_media))
4408
+ matched_media.metadata["photos_returned_name"] = name
4409
+ imported_media.append(matched_media)
4410
+ assigned = True
4411
+ break
4412
+ if not assigned:
4413
+ unmatched_names.append(name)
4414
+
4415
+ remaining_media = [media for media in staged_media if id(media) not in matched_media_ids]
4416
+ imported_counter: Counter[str] = Counter(unmatched_names)
4417
+
4418
+ def consume_exact(name: str) -> Optional[str]:
4419
+ if imported_counter.get(name, 0) > 0:
4420
+ imported_counter[name] -= 1
4421
+ if imported_counter[name] == 0:
4422
+ del imported_counter[name]
4423
+ return name
4424
+ return None
4425
+
4426
+ def consume_casefold(name: str) -> Optional[str]:
4427
+ lowered = name.casefold()
4428
+ for candidate in list(imported_counter.keys()):
4429
+ if imported_counter[candidate] > 0 and candidate.casefold() == lowered:
4430
+ imported_counter[candidate] -= 1
4431
+ if imported_counter[candidate] == 0:
4432
+ del imported_counter[candidate]
4433
+ return candidate
4434
+ return None
4435
+
4436
+ def consume_name(name: str) -> Optional[str]:
4437
+ return consume_exact(name) or consume_casefold(name)
4438
+
4439
+ name_suffix_pattern = re.compile(r"^(.*)[ _]?\([0-9-]+\)(\.[^.]+)$")
4440
+
4441
+ def strip_staging_suffix(name: str) -> Optional[str]:
4442
+ match = name_suffix_pattern.match(name)
4443
+ if match:
4444
+ return f"{match.group(1)}{match.group(2)}"
4445
+ return None
4446
+
4447
+ for media in remaining_media:
4448
+ stage_path = media.stage_path
4449
+ if stage_path is None:
4450
+ skipped_media.append(media)
4451
+ continue
4452
+ stage_name = stage_path.name
4453
+ candidates = [stage_name]
4454
+ staging_stem = media.metadata.get("staging_stem")
4455
+ if staging_stem:
4456
+ base_candidate = f"{staging_stem}{media.extension}" if media.extension else staging_stem
4457
+ if base_candidate not in candidates:
4458
+ candidates.append(base_candidate)
4459
+ tokenized_stem = media.metadata.get("staging_tokenized_stem")
4460
+ if tokenized_stem:
4461
+ token_base = f"{tokenized_stem}{media.extension}" if media.extension else tokenized_stem
4462
+ if token_base not in candidates:
4463
+ candidates.append(token_base)
4464
+ if tokenized_stem.endswith("__"):
4465
+ single_variant = tokenized_stem[:-1]
4466
+ token_base_single = f"{single_variant}{media.extension}" if media.extension else single_variant
4467
+ if token_base_single not in candidates:
4468
+ candidates.append(token_base_single)
4469
+ single_stage = stage_name.replace(tokenized_stem, single_variant)
4470
+ if single_stage not in candidates:
4471
+ candidates.append(single_stage)
4472
+ trimmed_candidate = strip_staging_suffix(stage_name)
4473
+ if trimmed_candidate and trimmed_candidate not in candidates:
4474
+ candidates.append(trimmed_candidate)
4475
+
4476
+ matched_name = None
4477
+ for candidate in candidates:
4478
+ matched_name = consume_name(candidate)
4479
+ if matched_name:
4480
+ break
4481
+
4482
+ if matched_name:
4483
+ media.metadata["photos_returned_name"] = matched_name
4484
+ imported_media.append(media)
4485
+ matched_media_ids.add(id(media))
4486
+ else:
4487
+ skipped_media.append(media)
4488
+
4489
+ leftover_imported = list(imported_counter.elements())
4490
+
4491
+ if leftover_imported:
4492
+ LOG.warning(
4493
+ "Photos returned %d filename(s) that did not match staged files; first entries: %s",
4494
+ len(leftover_imported),
4495
+ leftover_imported[:5],
4496
+ )
4497
+
4498
+ if skipped_media:
4499
+ LOG.warning("Photos did not report %d staged file(s); treating them as skipped.", len(skipped_media))
4500
+ rejection_parent = _log_directory() or staging_dir.parent
4501
+ rejection_parent.mkdir(parents=True, exist_ok=True)
4502
+ rejection_path = rejection_parent / f"Photos_rejections_{dt.datetime.now().strftime('%Y%m%d%H%M%S')}.txt"
4503
+ with rejection_path.open("w", encoding="utf-8") as rejection_handle:
4504
+ rejection_handle.write("FILES REJECTED OR MISSING FROM PHOTOS IMPORT\n")
4505
+ rejection_handle.write("=" * 80 + "\n")
4506
+ for media in skipped_media:
4507
+ stage_name = media.stage_path.name if media.stage_path else "<missing>"
4508
+ original_source = media.metadata.get("original_source") or str(media.source)
4509
+ rejection_handle.write(f"Staged: {stage_name}\tOriginal: {original_source}\n")
4510
+ if leftover_imported:
4511
+ rejection_handle.write("\nFILENAMES RETURNED BY PHOTOS WITH NO MATCH\n")
4512
+ rejection_handle.write("=" * 80 + "\n")
4513
+ for name in leftover_imported:
4514
+ rejection_handle.write(f"{name}\n")
4515
+ LOG.info("Photos rejection details written to %s", rejection_path)
4516
+ else:
4517
+ LOG.info("All %d staged file(s) reported by Photos.", len(imported_media))
4518
+
4519
+ imported_count = len(imported_media)
4520
+ skipped_count = len(skipped_media)
4521
+
4522
+ LOG.info(
4523
+ "Folder import complete: %d imported, %d skipped (duplicates or rejected by Photos)",
4524
+ imported_count,
4525
+ skipped_count,
4526
+ )
4527
+
4528
+ return imported_count, skipped_count, skipped_media
4529
+
4530
+
4531
+ def prompt_retry_failed_imports() -> bool:
4532
+ """Prompt the user whether to retry failed Apple Photos imports."""
4533
+ while True:
4534
+ try:
4535
+ response = input("\nWould you like to retry importing the failed files? (y/n): ").strip().lower()
4536
+ if response in ("y", "yes"):
4537
+ return True
4538
+ elif response in ("n", "no"):
4539
+ return False
4540
+ else:
4541
+ print("Please enter 'y' or 'n'.")
4542
+ except (KeyboardInterrupt, EOFError):
4543
+ print("\nNo retry.")
4544
+ return False
4545
+
4546
+
4547
+ def confirm_scan(root: Path, output_dir: Path, assume_yes: bool) -> bool:
4548
+ """Ask user confirmation before scanning and staging.
4549
+
4550
+ Args:
4551
+ root: directory or file being scanned
4552
+ output_dir: directory where staging/logs will be written
4553
+ assume_yes: skip prompt when True
4554
+ """
4555
+
4556
+ if assume_yes:
4557
+ return True
4558
+
4559
+ print("\nAbout to scan and import media with Smart Media Manager")
4560
+ print(f" Scan root: {root}")
4561
+ print(f" Logs/staging will be created under: {output_dir}")
4562
+ print("Press Enter to continue or 'n' to abort.")
4563
+
4564
+ try:
4565
+ response = input("Proceed? [Y/n]: ").strip().lower()
4566
+ except (KeyboardInterrupt, EOFError):
4567
+ print("\nAborted by user.")
4568
+ return False
4569
+
4570
+ if response in ("", "y", "yes"): # default yes
4571
+ return True
4572
+ print("Aborted by user.")
4573
+ return False
4574
+
4575
+
4576
+ def cleanup_staging(staging: Path) -> None:
4577
+ if staging.exists():
4578
+ LOG.debug("Deleting staging folder %s", staging)
4579
+ shutil.rmtree(staging)
4580
+
4581
+
4582
+ def configure_logging() -> None:
4583
+ LOG.setLevel(logging.INFO)
4584
+ LOG.handlers.clear()
4585
+ console = logging.StreamHandler()
4586
+ console.setLevel(logging.WARNING)
4587
+ console.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
4588
+ LOG.addHandler(console)
4589
+
4590
+
4591
+ def attach_file_logger(root: Path, run_ts: str) -> Path:
4592
+ """Create timestamped log directory in CWD and attach file logger.
4593
+
4594
+ Args:
4595
+ root: Scan root directory (not used for log location, kept for compatibility)
4596
+ run_ts: Timestamp string for this run
4597
+
4598
+ Returns:
4599
+ Path to created log file
4600
+
4601
+ Note:
4602
+ Log directory is created in current working directory (not scan root)
4603
+ with pattern: .smm__runtime_logs_YYYYMMDD_HHMMSS_<uuid>
4604
+ This prevents logs from being scanned as media files.
4605
+ """
4606
+ global _FILE_LOG_HANDLER
4607
+ if _FILE_LOG_HANDLER is not None:
4608
+ return Path(_FILE_LOG_HANDLER.baseFilename) # type: ignore[attr-defined]
4609
+
4610
+ # Create unique timestamped log directory in CWD (not scan root)
4611
+ # Format: .smm__runtime_logs_YYYYMMDD_HHMMSS_<short-uuid>
4612
+ short_uuid = str(uuid.uuid4())[:8] # First 8 chars of UUID for uniqueness
4613
+ log_dir_name = f"{SMM_LOGS_SUBDIR}{run_ts}_{short_uuid}"
4614
+ log_dir = Path.cwd() / log_dir_name
4615
+ log_dir.mkdir(parents=True, exist_ok=True)
4616
+
4617
+ # Log file inside the timestamped directory
4618
+ path = log_dir / f"smm_run_{run_ts}.log"
4619
+ handler = logging.FileHandler(path, encoding="utf-8")
4620
+ handler.setLevel(logging.INFO)
4621
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
4622
+ LOG.addHandler(handler)
4623
+ _FILE_LOG_HANDLER = handler
4624
+ return path
4625
+
4626
+
4627
+ def validate_root(path: Path, allow_file: bool = False) -> Path:
4628
+ resolved = path.expanduser().resolve()
4629
+ if not resolved.exists():
4630
+ raise RuntimeError(f"Path does not exist: {resolved}")
4631
+ if not resolved.is_dir() and not (allow_file and resolved.is_file()):
4632
+ raise RuntimeError(f"Path must be a {'file or ' if allow_file else ''}directory: {resolved}")
4633
+ return resolved
4634
+
4635
+
4636
+ def main() -> int:
4637
+ configure_logging()
4638
+ args = parse_args()
4639
+ LOG.info("smart-media-manager %s", __version__)
4640
+ skip_bootstrap = args.skip_bootstrap or bool(os.environ.get("SMART_MEDIA_MANAGER_SKIP_BOOTSTRAP"))
4641
+ if skip_bootstrap:
4642
+ LOG.debug("Skipping dependency bootstrap (manual mode).")
4643
+ else:
4644
+ ensure_system_dependencies()
4645
+ media_files: list[MediaFile] = []
4646
+ staging_root: Optional[Path] = None
4647
+ skip_log: Optional[Path] = None
4648
+ skip_logger: Optional[SkipLogger] = None
4649
+ stats = RunStatistics()
4650
+ try:
4651
+ # Auto-detect if path is a file or directory
4652
+ is_single_file = args.path.is_file()
4653
+
4654
+ # Warn if --recursive is used with a single file (it will be ignored)
4655
+ if is_single_file and args.recursive:
4656
+ LOG.warning("--recursive flag ignored when processing a single file")
4657
+ print("Warning: --recursive flag ignored when processing a single file")
4658
+
4659
+ root = validate_root(args.path, allow_file=is_single_file)
4660
+ run_ts = timestamp()
4661
+
4662
+ # For single file mode, use parent directory for outputs; otherwise use scan root
4663
+ output_dir = root.parent if is_single_file else root
4664
+
4665
+ if not confirm_scan(root, output_dir, args.assume_yes):
4666
+ return 0
4667
+
4668
+ # Check write permissions for both CWD (logs) and output_dir (skip logs, staging)
4669
+ try:
4670
+ check_write_permission(Path.cwd(), "create logs")
4671
+ except (PermissionError, OSError) as e:
4672
+ print(f"ERROR: {e}", file=sys.stderr)
4673
+ return 1
4674
+
4675
+ try:
4676
+ check_write_permission(output_dir, "create skip logs and staging directory")
4677
+ except (PermissionError, OSError) as e:
4678
+ print(f"ERROR: {e}", file=sys.stderr)
4679
+ return 1
4680
+
4681
+ log_path = attach_file_logger(root, run_ts) # root arg kept for compatibility, not used for log location
4682
+ configure_pillow_max_image_pixels(args.max_image_pixels)
4683
+
4684
+ for dependency in ("ffprobe", "ffmpeg", "osascript"):
4685
+ ensure_dependency(dependency)
4686
+ LOG.info("Scanning %s for media files...", root)
4687
+ print(f"Scanning {root}...")
4688
+
4689
+ # Skip log goes in output directory (scan root or parent of single file)
4690
+ skip_log = output_dir / f"smm_skipped_files_{run_ts}.log"
4691
+ if skip_log.exists():
4692
+ skip_log.unlink()
4693
+ skip_logger = SkipLogger(skip_log)
4694
+
4695
+ # Handle single file mode
4696
+ if is_single_file:
4697
+ media, reject_reason = detect_media(root, args.skip_compatibility_check)
4698
+ if media:
4699
+ media_files = [media]
4700
+ stats.total_files_scanned = 1
4701
+ stats.total_binary_files = 1
4702
+ stats.total_media_detected = 1
4703
+ if media.compatible:
4704
+ stats.media_compatible = 1
4705
+ else:
4706
+ stats.media_incompatible = 1
4707
+ elif reject_reason:
4708
+ skip_logger.log(root, reject_reason)
4709
+ LOG.debug("File rejected: %s", reject_reason)
4710
+ return 0
4711
+ else:
4712
+ LOG.debug("File is not a supported media format.")
4713
+ return 0
4714
+ else:
4715
+ media_files = gather_media_files(
4716
+ root,
4717
+ args.recursive,
4718
+ args.follow_symlinks,
4719
+ skip_logger,
4720
+ stats,
4721
+ args.skip_compatibility_check,
4722
+ )
4723
+ if not media_files:
4724
+ LOG.warning("No media files detected.")
4725
+ if skip_logger and not skip_logger.has_entries() and skip_log.exists():
4726
+ skip_log.unlink()
4727
+ return 0
4728
+ ensure_raw_dependencies_for_files(media_files)
4729
+
4730
+ # Create staging directory in output directory (scan root or parent of single file)
4731
+ staging_root = output_dir / f"FOUND_MEDIA_FILES_{run_ts}"
4732
+ staging_root.mkdir(parents=True, exist_ok=False)
4733
+
4734
+ # Create originals directory OUTSIDE staging folder (sibling directory)
4735
+ # CRITICAL: Must NOT be inside staging_root or Photos will try to import incompatible original files!
4736
+ originals_root = output_dir / f"ORIGINALS_{run_ts}"
4737
+
4738
+ move_to_staging(media_files, staging_root, originals_root, copy_files=args.copy_mode)
4739
+ ensure_compatibility(media_files, skip_logger, stats, args.skip_convert)
4740
+ # No sanitization needed - sequential suffix already ensures uniqueness
4741
+ update_stats_after_compatibility(stats, media_files)
4742
+
4743
+ missing_media: list[MediaFile] = [media for media in media_files if not media.stage_path or not media.stage_path.exists()]
4744
+
4745
+ if missing_media:
4746
+ missing_listing = ", ".join(str((m.stage_path or m.source)) for m in missing_media[:5])
4747
+ raise RuntimeError(f"Missing staged file(s): {missing_listing}")
4748
+
4749
+ staged_count = len(media_files)
4750
+ LOG.info("Preparing to import %d staged file(s) into Apple Photos", staged_count)
4751
+ print(f"\nStaging completed: {staged_count} file(s) ready for Photos import.")
4752
+
4753
+ update_stats_after_compatibility(stats, media_files)
4754
+ stats.log_summary()
4755
+ stats.print_summary()
4756
+
4757
+ LOG.info("Importing %d file(s) into Apple Photos via folder import...", staged_count)
4758
+ print(f"Importing {staged_count} file(s) into Apple Photos...")
4759
+
4760
+ # DEBUG: Timestamp when folder import is about to be called
4761
+ current_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
4762
+ LOG.debug("🚨 TIMESTAMP %s - The function 'import_folder_to_photos' was called now. No imports should have been attempted before this time!", current_timestamp)
4763
+ print(f"🚨 TIMESTAMP {current_timestamp} - Calling import_folder_to_photos NOW")
4764
+
4765
+ # Single folder import replaces batch import - no timing dependencies
4766
+ # By default, Photos will check for duplicates and prompt the user
4767
+ imported_count, skipped_count, skipped_media = import_folder_to_photos(
4768
+ staging_dir=staging_root,
4769
+ media_files=media_files,
4770
+ album_name=args.album,
4771
+ skip_duplicates=args.skip_duplicate_check,
4772
+ )
4773
+
4774
+ # Log skipped files (duplicates or rejected by Photos) and populate stats
4775
+ if skipped_media:
4776
+ for media in skipped_media:
4777
+ log_target = media.stage_path or media.metadata.get("original_source") or media.source
4778
+ skip_logger.log(Path(log_target), "Skipped by Photos (duplicate or incompatible format)")
4779
+ # Issue #3: Populate refused_filenames for enhanced error reporting
4780
+ stats.refused_filenames.append((Path(log_target), "Skipped by Photos (duplicate or incompatible format)"))
4781
+ LOG.warning("%d file(s) skipped by Photos (see skip log)", skipped_count)
4782
+ # Issue #3: Track refused count for statistics
4783
+ stats.refused_by_apple_photos = skipped_count
4784
+
4785
+ # Update statistics
4786
+ stats.total_imported = imported_count
4787
+ for media in media_files:
4788
+ if media not in skipped_media:
4789
+ if media.was_converted:
4790
+ stats.imported_after_conversion += 1
4791
+ else:
4792
+ stats.imported_without_conversion += 1
4793
+
4794
+ # Print statistics summary
4795
+ stats.print_summary()
4796
+ stats.log_summary()
4797
+
4798
+ # Issue #2: Prompt user to retry failed imports
4799
+ if skipped_media and prompt_retry_failed_imports():
4800
+ LOG.info("Retrying import for %d failed file(s)...", len(skipped_media))
4801
+ print(f"\nRetrying import for {len(skipped_media)} file(s)...")
4802
+
4803
+ # Create temporary retry staging folder with only skipped files
4804
+ retry_staging = staging_root.parent / f"RETRY_STAGING_{timestamp()}"
4805
+ retry_staging.mkdir(parents=True, exist_ok=True)
4806
+
4807
+ # Move skipped files to retry staging
4808
+ retry_media: list[MediaFile] = []
4809
+ for media in skipped_media:
4810
+ if media.stage_path and media.stage_path.exists():
4811
+ retry_dest = retry_staging / media.stage_path.name
4812
+ shutil.move(str(media.stage_path), str(retry_dest))
4813
+ media.stage_path = retry_dest
4814
+ retry_media.append(media)
4815
+
4816
+ if retry_media:
4817
+ # Retry import with only the failed files
4818
+ retry_imported, retry_skipped, retry_skipped_media = import_folder_to_photos(
4819
+ staging_dir=retry_staging,
4820
+ media_files=retry_media,
4821
+ album_name=args.album,
4822
+ skip_duplicates=args.skip_duplicate_check,
4823
+ )
4824
+
4825
+ # Update statistics with retry results
4826
+ stats.total_imported += retry_imported
4827
+ stats.refused_by_apple_photos = len(retry_skipped_media)
4828
+
4829
+ # Update refused_filenames with final failures
4830
+ stats.refused_filenames.clear()
4831
+ for media in retry_skipped_media:
4832
+ log_target = media.stage_path or media.metadata.get("original_source") or media.source
4833
+ stats.refused_filenames.append((Path(log_target), "Failed after retry"))
4834
+ skip_logger.log(Path(log_target), "Failed after retry")
4835
+
4836
+ # Clean up retry staging folder
4837
+ if retry_staging.exists():
4838
+ shutil.rmtree(retry_staging)
4839
+
4840
+ LOG.info("Retry complete: %d imported, %d still failed", retry_imported, len(retry_skipped_media))
4841
+ print(f"Retry complete: {retry_imported} imported, {len(retry_skipped_media)} still failed")
4842
+
4843
+ # Reprint final statistics
4844
+ stats.print_summary()
4845
+ stats.log_summary()
4846
+
4847
+ LOG.info(
4848
+ "Successfully imported %d media file(s) into Apple Photos.",
4849
+ imported_count,
4850
+ )
4851
+ if args.delete:
4852
+ cleanup_staging(staging_root)
4853
+ else:
4854
+ LOG.debug("Staging folder retained at %s", staging_root)
4855
+ if skip_log and skip_log.exists():
4856
+ if skip_logger and skip_logger.has_entries():
4857
+ LOG.info("Skipped file log saved at %s", skip_log)
4858
+ else:
4859
+ skip_log.unlink()
4860
+ print(f"\nDetailed log: {log_path}")
4861
+ return 0
4862
+ except KeyboardInterrupt:
4863
+ # Graceful handling of Ctrl+C - save logs and exit cleanly
4864
+ LOG.warning("Operation interrupted by user (Ctrl+C)")
4865
+ print("\n\n" + "=" * 60)
4866
+ print("INTERRUPTED: Operation cancelled by user (Ctrl+C)")
4867
+ print("=" * 60)
4868
+ # Save skip log if it has entries
4869
+ if skip_log and skip_log.exists():
4870
+ if skip_logger and skip_logger.has_entries():
4871
+ LOG.info("Skipped file log saved at %s", skip_log)
4872
+ print(f"Skip log saved: {skip_log}")
4873
+ else:
4874
+ skip_log.unlink()
4875
+ # Point to detailed log
4876
+ if "log_path" in locals():
4877
+ LOG.info("Detailed log saved at %s", log_path)
4878
+ print(f"Detailed log: {log_path}")
4879
+ # Preserve staging folder for potential resume - don't revert
4880
+ if staging_root and staging_root.exists():
4881
+ print(f"Staging folder preserved: {staging_root}")
4882
+ print("(Files can be manually imported or removed)")
4883
+ print("=" * 60)
4884
+ return 130 # Standard exit code for Ctrl+C (128 + SIGINT=2)
4885
+ except Exception as exc: # noqa: BLE001
4886
+ LOG.error("Error: %s", exc)
4887
+ revert_media_files(media_files, staging_root)
4888
+ if skip_log and skip_log.exists():
4889
+ if skip_logger and skip_logger.has_entries():
4890
+ LOG.info("Skipped file log saved at %s", skip_log)
4891
+ else:
4892
+ skip_log.unlink()
4893
+ if "log_path" in locals():
4894
+ print(f"See detailed log: {log_path}")
4895
+ return 1
4896
+ finally:
4897
+ if UNKNOWN_MAPPINGS.has_entries():
4898
+ updates_path = UNKNOWN_MAPPINGS.write_updates(Path.cwd())
4899
+ if updates_path:
4900
+ print(f"Unknown format mappings saved to {updates_path}")
4901
+
4902
+
4903
+ def run() -> None:
4904
+ sys.exit(main())
4905
+
4906
+
4907
+ class ProgressReporter:
4908
+ def __init__(self, total: int, label: str) -> None:
4909
+ self.total = max(total, 0)
4910
+ self.label = label
4911
+ self.start = time.time()
4912
+ self.completed = 0
4913
+ self.last_render = 0.0
4914
+ self.dynamic = self.total == 0
4915
+
4916
+ def update(self, step: int = 1, force: bool = False) -> None:
4917
+ self.completed += step
4918
+ now = time.time()
4919
+ if not force and now - self.last_render < 0.1 and (not self.dynamic and self.completed < self.total):
4920
+ return
4921
+ self.last_render = now
4922
+ if self.dynamic:
4923
+ sys.stdout.write(f"\r{self.label}: processed {self.completed}")
4924
+ else:
4925
+ percent = min(self.completed / self.total if self.total else 1.0, 1.0)
4926
+ elapsed = now - self.start
4927
+ rate = self.completed / elapsed if elapsed > 0 else 0
4928
+ remaining = (self.total - self.completed) / rate if rate > 0 else float("inf")
4929
+ bar_len = 30
4930
+ filled = int(bar_len * percent)
4931
+ bar = "#" * filled + "-" * (bar_len - filled)
4932
+ eta = "--:--" if remaining == float("inf") else time.strftime("%M:%S", time.gmtime(int(remaining)))
4933
+ sys.stdout.write(f"\r{self.label}: [{bar}] {percent * 100:5.1f}% ETA {eta}")
4934
+ sys.stdout.flush()
4935
+
4936
+ def finish(self) -> None:
4937
+ if not self.dynamic:
4938
+ self.completed = self.total
4939
+ self.update(step=0, force=True)
4940
+ sys.stdout.write("\n")
4941
+ sys.stdout.flush()