smart-media-manager 0.5.43a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smart_media_manager/__init__.py +15 -0
- smart_media_manager/cli.py +4941 -0
- smart_media_manager/format_compatibility.json +628 -0
- smart_media_manager/format_registry.json +11874 -0
- smart_media_manager/format_registry.py +491 -0
- smart_media_manager/format_rules.py +677 -0
- smart_media_manager/metadata_registry.json +1113 -0
- smart_media_manager/metadata_registry.py +229 -0
- smart_media_manager/uuid_generator.py +140 -0
- smart_media_manager-0.5.43a4.dist-info/METADATA +340 -0
- smart_media_manager-0.5.43a4.dist-info/RECORD +15 -0
- smart_media_manager-0.5.43a4.dist-info/WHEEL +5 -0
- smart_media_manager-0.5.43a4.dist-info/entry_points.txt +2 -0
- smart_media_manager-0.5.43a4.dist-info/licenses/LICENSE +21 -0
- smart_media_manager-0.5.43a4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,4941 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import datetime as dt
|
|
5
|
+
import logging
|
|
6
|
+
import math
|
|
7
|
+
import mimetypes
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
import tempfile
|
|
14
|
+
import time
|
|
15
|
+
import unicodedata
|
|
16
|
+
import uuid
|
|
17
|
+
from collections import Counter
|
|
18
|
+
from contextlib import suppress
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, Callable, Iterable, Optional
|
|
22
|
+
|
|
23
|
+
import json
|
|
24
|
+
|
|
25
|
+
import filetype # type: ignore[import-untyped]
|
|
26
|
+
import puremagic
|
|
27
|
+
from PIL import Image
|
|
28
|
+
from isbinary import is_binary_file
|
|
29
|
+
from smart_media_manager import __version__
|
|
30
|
+
from smart_media_manager.format_rules import FormatRule, match_rule
|
|
31
|
+
from smart_media_manager import format_registry
|
|
32
|
+
from smart_media_manager import metadata_registry
|
|
33
|
+
from pyfsig import interface as pyfsig_interface # type: ignore[import-untyped]
|
|
34
|
+
import rawpy # type: ignore[import-untyped]
|
|
35
|
+
|
|
36
|
+
# python-magic requires libmagic system library (installed via Homebrew during bootstrap)
|
|
37
|
+
# Must be lazy-loaded so script can start and run bootstrap code
|
|
38
|
+
try:
|
|
39
|
+
import magic
|
|
40
|
+
except ImportError: # pragma: no cover - system dependency
|
|
41
|
+
magic = None # type: ignore[assignment]
|
|
42
|
+
|
|
43
|
+
LOG = logging.getLogger("smart_media_manager")
|
|
44
|
+
_FILE_LOG_HANDLER: Optional[logging.Handler] = None
|
|
45
|
+
SMM_LOGS_SUBDIR = ".smm__runtime_logs_" # Unique prefix for timestamped log directories (created in CWD, excluded from scanning)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _log_directory() -> Optional[Path]:
|
|
49
|
+
if _FILE_LOG_HANDLER is None:
|
|
50
|
+
return None
|
|
51
|
+
base = getattr(_FILE_LOG_HANDLER, "baseFilename", None)
|
|
52
|
+
if not base:
|
|
53
|
+
return None
|
|
54
|
+
return Path(base).resolve().parent
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
SAFE_NAME_PATTERN = re.compile(r"[^A-Za-z0-9_.-]")
|
|
58
|
+
MAX_APPLESCRIPT_CHARS = 20000 # Max characters for AppleScript arguments
|
|
59
|
+
MAX_SAFE_STEM_LENGTH = 120 # Max length for safe filename stems
|
|
60
|
+
MAX_PHOTOS_FILENAME_LENGTH = 60 # Apple Photos filename limit (including extension)
|
|
61
|
+
APPLE_PHOTOS_FOLDER_IMPORT_TIMEOUT = 1800 # seconds (30 min) - timeout for single folder import of large collections
|
|
62
|
+
|
|
63
|
+
STAGING_TOKEN_PREFIX = "__SMM"
|
|
64
|
+
STAGING_TOKEN_PATTERN = re.compile(r"SMM([A-Za-z0-9]+)")
|
|
65
|
+
MAX_IMAGE_PIXELS_UNSET = object()
|
|
66
|
+
|
|
67
|
+
# Namespace used to generate deterministic UUIDs for previously unknown mappings
|
|
68
|
+
UNKNOWN_UUID_NAMESPACE = uuid.UUID("9a3e9b14-25f0-4e37-bc8e-cc3ad0e59bce")
|
|
69
|
+
|
|
70
|
+
BINWALK_EXECUTABLE = shutil.which("binwalk")
|
|
71
|
+
|
|
72
|
+
_MAGIC_MIME = None
|
|
73
|
+
_MAGIC_DESC = None
|
|
74
|
+
|
|
75
|
+
TOOL_PRIORITY = [
|
|
76
|
+
"libmagic",
|
|
77
|
+
"binwalk",
|
|
78
|
+
"puremagic",
|
|
79
|
+
"pyfsig",
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
TOOL_WEIGHTS = {
|
|
83
|
+
"libmagic": 1.4,
|
|
84
|
+
"binwalk": 1.2,
|
|
85
|
+
"puremagic": 1.1,
|
|
86
|
+
"pyfsig": 1.0,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
RAW_DEPENDENCY_GROUPS = {
|
|
90
|
+
"canon": {
|
|
91
|
+
"extensions": {".crw", ".cr2", ".cr3", ".crm", ".crx"},
|
|
92
|
+
"brew": ["libraw"],
|
|
93
|
+
"pip": ["rawpy"],
|
|
94
|
+
"cask": ["adobe-dng-converter"],
|
|
95
|
+
},
|
|
96
|
+
"nikon": {
|
|
97
|
+
"extensions": {".nef", ".nrw"},
|
|
98
|
+
"brew": ["libraw"],
|
|
99
|
+
"pip": ["rawpy"],
|
|
100
|
+
"cask": [],
|
|
101
|
+
},
|
|
102
|
+
"sony": {
|
|
103
|
+
"extensions": {".arw", ".srf", ".sr2"},
|
|
104
|
+
"brew": ["libraw"],
|
|
105
|
+
"pip": ["rawpy"],
|
|
106
|
+
"cask": [],
|
|
107
|
+
},
|
|
108
|
+
"fujifilm": {
|
|
109
|
+
"extensions": {".raf"},
|
|
110
|
+
"brew": ["libraw"],
|
|
111
|
+
"pip": ["rawpy"],
|
|
112
|
+
"cask": [],
|
|
113
|
+
},
|
|
114
|
+
"olympus": {
|
|
115
|
+
"extensions": {".orf"},
|
|
116
|
+
"brew": ["libraw"],
|
|
117
|
+
"pip": ["rawpy"],
|
|
118
|
+
"cask": [],
|
|
119
|
+
},
|
|
120
|
+
"panasonic": {
|
|
121
|
+
"extensions": {".rw2", ".raw"},
|
|
122
|
+
"brew": ["libraw"],
|
|
123
|
+
"pip": ["rawpy"],
|
|
124
|
+
"cask": [],
|
|
125
|
+
},
|
|
126
|
+
"pentax": {
|
|
127
|
+
"extensions": {".pef", ".dng"},
|
|
128
|
+
"brew": ["libraw"],
|
|
129
|
+
"pip": ["rawpy"],
|
|
130
|
+
"cask": [],
|
|
131
|
+
},
|
|
132
|
+
"leica": {
|
|
133
|
+
"extensions": {".dng", ".rwl"},
|
|
134
|
+
"brew": ["libraw"],
|
|
135
|
+
"pip": ["rawpy"],
|
|
136
|
+
"cask": ["adobe-dng-converter"],
|
|
137
|
+
},
|
|
138
|
+
"phaseone": {
|
|
139
|
+
"extensions": {".iiq", ".cap"},
|
|
140
|
+
"brew": ["libraw"],
|
|
141
|
+
"pip": ["rawpy"],
|
|
142
|
+
"cask": ["adobe-dng-converter"],
|
|
143
|
+
},
|
|
144
|
+
"hasselblad": {
|
|
145
|
+
"extensions": {".3fr", ".fff"},
|
|
146
|
+
"brew": ["libraw"],
|
|
147
|
+
"pip": ["rawpy"],
|
|
148
|
+
"cask": ["adobe-dng-converter"],
|
|
149
|
+
},
|
|
150
|
+
"sigma": {
|
|
151
|
+
"extensions": {".x3f"},
|
|
152
|
+
"brew": ["libraw", "libopenraw"],
|
|
153
|
+
"pip": ["rawpy"],
|
|
154
|
+
"cask": [],
|
|
155
|
+
},
|
|
156
|
+
"gopro": {
|
|
157
|
+
"extensions": {".gpr"},
|
|
158
|
+
"brew": ["libraw"],
|
|
159
|
+
"pip": ["rawpy"],
|
|
160
|
+
"cask": [],
|
|
161
|
+
},
|
|
162
|
+
"dji": {
|
|
163
|
+
"extensions": {".dng"},
|
|
164
|
+
"brew": ["libraw"],
|
|
165
|
+
"pip": ["rawpy"],
|
|
166
|
+
"cask": [],
|
|
167
|
+
},
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
RAW_EXTENSION_TO_GROUPS: dict[str, set[str]] = {}
|
|
171
|
+
for group_name, config in RAW_DEPENDENCY_GROUPS.items():
|
|
172
|
+
for ext in config["extensions"]:
|
|
173
|
+
normalized = ext.lower()
|
|
174
|
+
RAW_EXTENSION_TO_GROUPS.setdefault(normalized, set()).add(group_name)
|
|
175
|
+
|
|
176
|
+
_BREW_PATH_CACHE: Optional[str] = None
|
|
177
|
+
_PIP_PACKAGE_CACHE: set[str] = set()
|
|
178
|
+
_INSTALLED_RAW_GROUPS: set[str] = set()
|
|
179
|
+
|
|
180
|
+
REQUIRED_BREW_PACKAGES = {
|
|
181
|
+
"ffmpeg": "ffmpeg",
|
|
182
|
+
"libjxl": "jpeg-xl",
|
|
183
|
+
"libheif": "libheif",
|
|
184
|
+
"imagemagick": "imagemagick",
|
|
185
|
+
"webp": "webp",
|
|
186
|
+
"exiftool": "exiftool",
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
IMAGE_EXTENSION_MAP = {
|
|
190
|
+
"jpeg": ".jpg",
|
|
191
|
+
"jpg": ".jpg",
|
|
192
|
+
"png": ".png",
|
|
193
|
+
"tiff": ".tiff",
|
|
194
|
+
"tif": ".tiff",
|
|
195
|
+
"gif": ".gif",
|
|
196
|
+
"bmp": ".bmp",
|
|
197
|
+
"webp": ".webp",
|
|
198
|
+
"heic": ".heic",
|
|
199
|
+
"heif": ".heic",
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
COMPATIBLE_IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".heic", ".tiff", ".gif"}
|
|
203
|
+
COMPATIBLE_VIDEO_CONTAINERS = {"mp4", "mov", "quicktime", "m4v"}
|
|
204
|
+
COMPATIBLE_VIDEO_CODECS = {
|
|
205
|
+
# H.264 / AVC
|
|
206
|
+
"h264",
|
|
207
|
+
"avc1",
|
|
208
|
+
# HEVC / H.265
|
|
209
|
+
"hevc",
|
|
210
|
+
"h265",
|
|
211
|
+
"hvc1",
|
|
212
|
+
# Apple ProRes Family (all variants supported by Photos)
|
|
213
|
+
"apco", # ProRes 422 Proxy
|
|
214
|
+
"apcs", # ProRes 422 LT
|
|
215
|
+
"apcn", # ProRes 422
|
|
216
|
+
"apch", # ProRes 422 HQ
|
|
217
|
+
"ap4h", # ProRes 4444
|
|
218
|
+
"ap4x", # ProRes 4444 XQ
|
|
219
|
+
# Note: ProRes RAW cannot be imported (requires Final Cut Pro)
|
|
220
|
+
}
|
|
221
|
+
COMPATIBLE_AUDIO_CODECS = {
|
|
222
|
+
"aac",
|
|
223
|
+
"mp3",
|
|
224
|
+
"alac",
|
|
225
|
+
"pcm_s16le",
|
|
226
|
+
"pcm_s24le",
|
|
227
|
+
"pcm_s16be",
|
|
228
|
+
"pcm_f32le",
|
|
229
|
+
"ac3",
|
|
230
|
+
"eac3",
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
ARCHIVE_EXTENSIONS = {
|
|
234
|
+
# Standard archives
|
|
235
|
+
"zip",
|
|
236
|
+
"rar",
|
|
237
|
+
"7z",
|
|
238
|
+
"tar",
|
|
239
|
+
"gz",
|
|
240
|
+
"bz2",
|
|
241
|
+
"xz",
|
|
242
|
+
"lz",
|
|
243
|
+
"lzma",
|
|
244
|
+
"zst", # Zstandard (used by Homebrew, etc.)
|
|
245
|
+
"zstd",
|
|
246
|
+
"cab",
|
|
247
|
+
"iso",
|
|
248
|
+
"tgz",
|
|
249
|
+
"tbz2",
|
|
250
|
+
"txz",
|
|
251
|
+
"cpio",
|
|
252
|
+
"sit", # StuffIt
|
|
253
|
+
"sitx",
|
|
254
|
+
# macOS packages/disk images
|
|
255
|
+
"dmg",
|
|
256
|
+
"pkg", # macOS installer package (XAR archive)
|
|
257
|
+
"xar", # eXtensible ARchive format
|
|
258
|
+
"mpkg", # macOS meta-package
|
|
259
|
+
"sparseimage",
|
|
260
|
+
"sparsebundle",
|
|
261
|
+
# Linux packages
|
|
262
|
+
"deb",
|
|
263
|
+
"rpm",
|
|
264
|
+
# Windows packages
|
|
265
|
+
"msi",
|
|
266
|
+
"msix",
|
|
267
|
+
"appx",
|
|
268
|
+
# Java/Android
|
|
269
|
+
"apk",
|
|
270
|
+
"jar",
|
|
271
|
+
"war",
|
|
272
|
+
"ear",
|
|
273
|
+
"aar", # Android library
|
|
274
|
+
# Browser extensions
|
|
275
|
+
"xpi", # Firefox/Mozilla extension
|
|
276
|
+
"crx", # Chrome extension
|
|
277
|
+
# Application packages (zip-based)
|
|
278
|
+
"apkg", # Anki flashcard package
|
|
279
|
+
"sketch", # Sketch design files
|
|
280
|
+
"figma",
|
|
281
|
+
# Office documents (zip-based XML)
|
|
282
|
+
"docx",
|
|
283
|
+
"xlsx",
|
|
284
|
+
"pptx",
|
|
285
|
+
"odt",
|
|
286
|
+
"ods",
|
|
287
|
+
"odp",
|
|
288
|
+
"odg",
|
|
289
|
+
# Ebooks
|
|
290
|
+
"epub",
|
|
291
|
+
"mobi",
|
|
292
|
+
"azw",
|
|
293
|
+
"azw3",
|
|
294
|
+
# ML/AI model files
|
|
295
|
+
"safetensors",
|
|
296
|
+
"gguf", # llama.cpp models
|
|
297
|
+
"onnx",
|
|
298
|
+
# Virtual disk images
|
|
299
|
+
"vhd",
|
|
300
|
+
"vhdx",
|
|
301
|
+
"vmdk",
|
|
302
|
+
"qcow2",
|
|
303
|
+
# Fonts
|
|
304
|
+
"ttf",
|
|
305
|
+
"otf",
|
|
306
|
+
"woff",
|
|
307
|
+
"woff2",
|
|
308
|
+
"eot",
|
|
309
|
+
"ttc", # TrueType Collection
|
|
310
|
+
# Executables (not archives but should skip)
|
|
311
|
+
"exe",
|
|
312
|
+
"dll",
|
|
313
|
+
"so",
|
|
314
|
+
"dylib",
|
|
315
|
+
# Documents
|
|
316
|
+
"pdf",
|
|
317
|
+
"rtf",
|
|
318
|
+
"doc", # Legacy Word
|
|
319
|
+
"xls", # Legacy Excel
|
|
320
|
+
"ppt", # Legacy PowerPoint
|
|
321
|
+
# Icon files (not importable into Photos)
|
|
322
|
+
"icns", # macOS icon
|
|
323
|
+
"ico", # Windows icon
|
|
324
|
+
"cur", # Windows cursor
|
|
325
|
+
"ani", # Windows animated cursor
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
ARCHIVE_MIME_TYPES = {
|
|
329
|
+
# Standard archives
|
|
330
|
+
"application/zip",
|
|
331
|
+
"application/x-zip-compressed",
|
|
332
|
+
"application/x-7z-compressed",
|
|
333
|
+
"application/x-tar",
|
|
334
|
+
"application/x-rar",
|
|
335
|
+
"application/x-rar-compressed",
|
|
336
|
+
"application/vnd.rar",
|
|
337
|
+
"application/gzip",
|
|
338
|
+
"application/x-gzip",
|
|
339
|
+
"application/x-bzip2",
|
|
340
|
+
"application/x-xz",
|
|
341
|
+
"application/x-lzip",
|
|
342
|
+
"application/x-lzma",
|
|
343
|
+
"application/zstd",
|
|
344
|
+
"application/x-cpio",
|
|
345
|
+
"application/x-stuffit",
|
|
346
|
+
"application/x-stuffitx",
|
|
347
|
+
# Disk images
|
|
348
|
+
"application/x-iso9660-image",
|
|
349
|
+
"application/x-apple-diskimage",
|
|
350
|
+
# Packages
|
|
351
|
+
"application/x-xar", # macOS XAR archive (.pkg, .xar)
|
|
352
|
+
"application/vnd.android.package-archive",
|
|
353
|
+
"application/java-archive",
|
|
354
|
+
"application/x-debian-package",
|
|
355
|
+
"application/x-rpm",
|
|
356
|
+
"application/x-msi",
|
|
357
|
+
# Office documents
|
|
358
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
359
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
360
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
361
|
+
"application/vnd.oasis.opendocument.text",
|
|
362
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
|
363
|
+
"application/vnd.oasis.opendocument.presentation",
|
|
364
|
+
# Ebooks
|
|
365
|
+
"application/epub+zip",
|
|
366
|
+
"application/x-mobipocket-ebook",
|
|
367
|
+
# Fonts
|
|
368
|
+
"font/otf",
|
|
369
|
+
"font/ttf",
|
|
370
|
+
"font/woff",
|
|
371
|
+
"font/woff2",
|
|
372
|
+
"application/font-sfnt",
|
|
373
|
+
"application/x-font-ttf",
|
|
374
|
+
"application/x-font-otf",
|
|
375
|
+
# Documents
|
|
376
|
+
"application/pdf",
|
|
377
|
+
"application/rtf",
|
|
378
|
+
"application/msword",
|
|
379
|
+
"application/vnd.ms-excel",
|
|
380
|
+
"application/vnd.ms-powerpoint",
|
|
381
|
+
# Executables
|
|
382
|
+
"application/x-msdownload",
|
|
383
|
+
"application/x-executable",
|
|
384
|
+
"application/x-mach-binary",
|
|
385
|
+
"application/x-sharedlib",
|
|
386
|
+
# Icon files (not importable into Photos)
|
|
387
|
+
"image/x-icon", # Windows .ico
|
|
388
|
+
"image/vnd.microsoft.icon", # Windows .ico (alternative)
|
|
389
|
+
"image/x-icns", # macOS .icns
|
|
390
|
+
"application/x-icns", # macOS .icns (alternative)
|
|
391
|
+
"image/x-win-bitmap", # Windows cursor
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
NON_MEDIA_REASON_KEYWORDS = (
|
|
395
|
+
"archive",
|
|
396
|
+
"unsupported format",
|
|
397
|
+
"format not identified",
|
|
398
|
+
"non-media",
|
|
399
|
+
"uuid detection failed",
|
|
400
|
+
"rawpy unsupported",
|
|
401
|
+
"document",
|
|
402
|
+
"pdf",
|
|
403
|
+
"installer",
|
|
404
|
+
"binary check failed",
|
|
405
|
+
"icon", # Icon files (.icns, .ico, .cur, .ani)
|
|
406
|
+
"cursor", # Cursor files
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
TEXTUAL_MIME_HINTS = {
|
|
410
|
+
"application/x-typescript",
|
|
411
|
+
"application/javascript",
|
|
412
|
+
"application/x-javascript",
|
|
413
|
+
"application/json",
|
|
414
|
+
"application/xml",
|
|
415
|
+
"text/javascript",
|
|
416
|
+
"text/typescript",
|
|
417
|
+
"text/x-python",
|
|
418
|
+
"text/x-shellscript",
|
|
419
|
+
"text/x-c",
|
|
420
|
+
"text/x-c++",
|
|
421
|
+
"text/x-go",
|
|
422
|
+
"text/x-ruby",
|
|
423
|
+
"text/x-php",
|
|
424
|
+
"text/markdown",
|
|
425
|
+
"text/plain",
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
TEXT_ONLY_HINT_EXTENSIONS = {
|
|
429
|
+
".ts",
|
|
430
|
+
".tsx",
|
|
431
|
+
".js",
|
|
432
|
+
".jsx",
|
|
433
|
+
".mjs",
|
|
434
|
+
".cjs",
|
|
435
|
+
".py",
|
|
436
|
+
".pyw",
|
|
437
|
+
".java",
|
|
438
|
+
".cs",
|
|
439
|
+
".c",
|
|
440
|
+
".cc",
|
|
441
|
+
".cpp",
|
|
442
|
+
".h",
|
|
443
|
+
".hpp",
|
|
444
|
+
".go",
|
|
445
|
+
".rs",
|
|
446
|
+
".rb",
|
|
447
|
+
".php",
|
|
448
|
+
".sh",
|
|
449
|
+
".bash",
|
|
450
|
+
".zsh",
|
|
451
|
+
".ps1",
|
|
452
|
+
".bat",
|
|
453
|
+
".sql",
|
|
454
|
+
".swift",
|
|
455
|
+
".kt",
|
|
456
|
+
".json",
|
|
457
|
+
".yml",
|
|
458
|
+
".yaml",
|
|
459
|
+
".toml",
|
|
460
|
+
".ini",
|
|
461
|
+
".cfg",
|
|
462
|
+
".conf",
|
|
463
|
+
".md",
|
|
464
|
+
".rst",
|
|
465
|
+
".txt",
|
|
466
|
+
".log",
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
VIDEO_EXTENSION_MAP = {
|
|
470
|
+
"mp4": ".mp4",
|
|
471
|
+
"m4v": ".m4v",
|
|
472
|
+
"mov": ".mov",
|
|
473
|
+
"qt": ".mov",
|
|
474
|
+
"avi": ".avi",
|
|
475
|
+
"mkv": ".mkv",
|
|
476
|
+
"webm": ".webm",
|
|
477
|
+
"flv": ".flv",
|
|
478
|
+
"wmv": ".wmv",
|
|
479
|
+
"mpg": ".mpg",
|
|
480
|
+
"mpeg": ".mpg",
|
|
481
|
+
"3gp": ".3gp",
|
|
482
|
+
"3g2": ".3g2",
|
|
483
|
+
"ts": ".ts",
|
|
484
|
+
"m2ts": ".ts",
|
|
485
|
+
"mts": ".ts",
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
VIDEO_EXTENSION_HINTS = set(VIDEO_EXTENSION_MAP.keys())
|
|
489
|
+
|
|
490
|
+
VIDEO_MIME_EXTENSION_MAP = {
|
|
491
|
+
"video/mp4": ".mp4",
|
|
492
|
+
"video/x-m4v": ".m4v",
|
|
493
|
+
"video/quicktime": ".mov",
|
|
494
|
+
"video/x-quicktime": ".mov",
|
|
495
|
+
"video/x-msvideo": ".avi",
|
|
496
|
+
"video/x-matroska": ".mkv",
|
|
497
|
+
"video/webm": ".webm",
|
|
498
|
+
"video/x-flv": ".flv",
|
|
499
|
+
"video/x-ms-wmv": ".wmv",
|
|
500
|
+
"video/mpeg": ".mpg",
|
|
501
|
+
"video/MP2T": ".ts",
|
|
502
|
+
"video/3gpp": ".3gp",
|
|
503
|
+
"video/3gpp2": ".3g2",
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
IMAGE_MIME_EXTENSION_MAP = {
|
|
507
|
+
"image/jpeg": ".jpg",
|
|
508
|
+
"image/png": ".png",
|
|
509
|
+
"image/tiff": ".tiff",
|
|
510
|
+
"image/gif": ".gif",
|
|
511
|
+
"image/bmp": ".bmp",
|
|
512
|
+
"image/webp": ".webp",
|
|
513
|
+
"image/heif": ".heic",
|
|
514
|
+
"image/heic": ".heic",
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
ALL_IMAGE_EXTENSIONS = set(IMAGE_EXTENSION_MAP.keys())
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
@dataclass
|
|
521
|
+
class MediaFile:
|
|
522
|
+
source: Path
|
|
523
|
+
kind: str
|
|
524
|
+
extension: str
|
|
525
|
+
format_name: str
|
|
526
|
+
stage_path: Optional[Path] = None
|
|
527
|
+
compatible: bool = False
|
|
528
|
+
video_codec: Optional[str] = None
|
|
529
|
+
audio_codec: Optional[str] = None
|
|
530
|
+
audio_sample_rate: Optional[int] = None
|
|
531
|
+
audio_sample_fmt: Optional[str] = None
|
|
532
|
+
original_suffix: str = ""
|
|
533
|
+
rule_id: str = ""
|
|
534
|
+
action: str = "import"
|
|
535
|
+
requires_processing: bool = False
|
|
536
|
+
was_converted: bool = False # Tracks if file was actually converted (for stats)
|
|
537
|
+
notes: str = ""
|
|
538
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
539
|
+
detected_compatible: bool = False # Detection-time compatibility prior to conversions
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
@dataclass
|
|
543
|
+
class SkipLogger:
|
|
544
|
+
path: Path
|
|
545
|
+
entries: int = 0
|
|
546
|
+
|
|
547
|
+
def log(self, file_path: Path, reason: str) -> None:
|
|
548
|
+
# Log to file only - avoid flooding console with skip messages
|
|
549
|
+
reason_lower = reason.lower()
|
|
550
|
+
# Non-media files are silently ignored to avoid gigantic logs
|
|
551
|
+
if reason_lower.startswith("non-media"):
|
|
552
|
+
return
|
|
553
|
+
LOG.debug("Skipping %s (%s)", file_path, reason)
|
|
554
|
+
with self.path.open("a", encoding="utf-8") as handle:
|
|
555
|
+
handle.write(f"{file_path}\t{reason}\n")
|
|
556
|
+
self.entries += 1
|
|
557
|
+
|
|
558
|
+
def has_entries(self) -> bool:
|
|
559
|
+
return self.entries > 0
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
class UnknownMappingCollector:
|
|
563
|
+
"""Collects missing format UUID mappings and emits an update JSON.
|
|
564
|
+
|
|
565
|
+
The collector keeps only one sample per (tool, token, kind) triple
|
|
566
|
+
to avoid bloating memory when thousands of files share the same
|
|
567
|
+
missing mapping.
|
|
568
|
+
"""
|
|
569
|
+
|
|
570
|
+
def __init__(self) -> None:
|
|
571
|
+
self._entries: dict[tuple[str, str, str], str] = {}
|
|
572
|
+
|
|
573
|
+
def register(self, tool: str, token: str, kind: str, sample: Path) -> None:
|
|
574
|
+
key = (tool, token, kind)
|
|
575
|
+
if key not in self._entries:
|
|
576
|
+
self._entries[key] = str(sample)
|
|
577
|
+
LOG.info("Captured missing UUID mapping: %s -> %s (%s)", tool, token, kind)
|
|
578
|
+
|
|
579
|
+
def has_entries(self) -> bool:
|
|
580
|
+
return bool(self._entries)
|
|
581
|
+
|
|
582
|
+
def _generated_uuid(self, token: str, kind: str) -> str:
|
|
583
|
+
suffix = {
|
|
584
|
+
"video": "V",
|
|
585
|
+
"audio": "A",
|
|
586
|
+
"image": "I",
|
|
587
|
+
"container": "C",
|
|
588
|
+
}.get(kind, "U")
|
|
589
|
+
base = uuid.uuid5(UNKNOWN_UUID_NAMESPACE, f"{kind}:{token}")
|
|
590
|
+
return f"{base}-{suffix}"
|
|
591
|
+
|
|
592
|
+
def write_updates(self, output_dir: Path) -> Optional[Path]:
|
|
593
|
+
if not self._entries:
|
|
594
|
+
return None
|
|
595
|
+
|
|
596
|
+
update: dict[str, Any] = {
|
|
597
|
+
"format_names": {},
|
|
598
|
+
"tool_mappings": {},
|
|
599
|
+
"apple_photos_compatible": {
|
|
600
|
+
"images": {"needs_conversion": []},
|
|
601
|
+
"videos": {
|
|
602
|
+
"needs_rewrap": [],
|
|
603
|
+
"needs_transcode_video": [],
|
|
604
|
+
"needs_transcode_audio": [],
|
|
605
|
+
"compatible_containers": [],
|
|
606
|
+
"compatible_video_codecs": [],
|
|
607
|
+
},
|
|
608
|
+
},
|
|
609
|
+
"generated_from": "smart-media-manager auto-run",
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
for (tool, token, kind), sample in sorted(self._entries.items()):
|
|
613
|
+
mapped_uuid = self._generated_uuid(token, kind)
|
|
614
|
+
update.setdefault("tool_mappings", {}).setdefault(tool, {})[token] = mapped_uuid
|
|
615
|
+
update["format_names"][mapped_uuid] = {
|
|
616
|
+
"canonical": token,
|
|
617
|
+
"extensions": [],
|
|
618
|
+
"kind": kind,
|
|
619
|
+
"sample": sample,
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
if kind == "video":
|
|
623
|
+
update["apple_photos_compatible"]["videos"]["needs_transcode_video"].append(mapped_uuid)
|
|
624
|
+
elif kind == "audio":
|
|
625
|
+
update["apple_photos_compatible"]["videos"]["needs_transcode_audio"].append(mapped_uuid)
|
|
626
|
+
elif kind == "image":
|
|
627
|
+
update["apple_photos_compatible"]["images"]["needs_conversion"].append(mapped_uuid)
|
|
628
|
+
|
|
629
|
+
run_ts = timestamp()
|
|
630
|
+
out_path = output_dir / f"format_registry_updates_{run_ts}.json"
|
|
631
|
+
try:
|
|
632
|
+
with out_path.open("w", encoding="utf-8") as handle:
|
|
633
|
+
json.dump(update, handle, indent=2, sort_keys=True)
|
|
634
|
+
LOG.info("Wrote %d missing mapping(s) to %s", len(self._entries), out_path)
|
|
635
|
+
return out_path
|
|
636
|
+
except Exception as exc: # noqa: BLE001
|
|
637
|
+
LOG.error("Failed to write format registry updates: %s", exc)
|
|
638
|
+
return None
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
# Global collector shared across the run
|
|
642
|
+
UNKNOWN_MAPPINGS = UnknownMappingCollector()
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
@dataclass
|
|
646
|
+
class RunStatistics:
|
|
647
|
+
"""Tracks comprehensive statistics for a Smart Media Manager run."""
|
|
648
|
+
|
|
649
|
+
total_files_scanned: int = 0
|
|
650
|
+
total_binary_files: int = 0
|
|
651
|
+
total_text_files: int = 0
|
|
652
|
+
total_media_detected: int = 0
|
|
653
|
+
media_compatible: int = 0
|
|
654
|
+
media_incompatible: int = 0
|
|
655
|
+
incompatible_with_conversion_rule: int = 0
|
|
656
|
+
conversion_attempted: int = 0
|
|
657
|
+
conversion_succeeded: int = 0
|
|
658
|
+
conversion_failed: int = 0
|
|
659
|
+
imported_after_conversion: int = 0
|
|
660
|
+
imported_without_conversion: int = 0
|
|
661
|
+
total_imported: int = 0
|
|
662
|
+
refused_by_apple_photos: int = 0
|
|
663
|
+
refused_filenames: list[tuple[Path, str]] = field(default_factory=list)
|
|
664
|
+
skipped_errors: int = 0
|
|
665
|
+
skipped_unknown_format: int = 0
|
|
666
|
+
skipped_corrupt_or_empty: int = 0
|
|
667
|
+
skipped_non_media: int = 0
|
|
668
|
+
skipped_other: int = 0
|
|
669
|
+
staging_total: int = 0
|
|
670
|
+
staging_expected: int = 0
|
|
671
|
+
|
|
672
|
+
def print_summary(self) -> None:
|
|
673
|
+
"""Print a colored, formatted summary of the run statistics."""
|
|
674
|
+
# ANSI color codes
|
|
675
|
+
BOLD = "\033[1m"
|
|
676
|
+
GREEN = "\033[92m"
|
|
677
|
+
YELLOW = "\033[93m"
|
|
678
|
+
RED = "\033[91m"
|
|
679
|
+
BLUE = "\033[94m"
|
|
680
|
+
CYAN = "\033[96m"
|
|
681
|
+
RESET = "\033[0m"
|
|
682
|
+
|
|
683
|
+
print(f"\n{BOLD}{'=' * 80}{RESET}")
|
|
684
|
+
print(f"{BOLD}{CYAN}Smart Media Manager - Run Summary{RESET}")
|
|
685
|
+
print(f"{BOLD}{'=' * 80}{RESET}\n")
|
|
686
|
+
|
|
687
|
+
# Scanning section
|
|
688
|
+
print(f"{BOLD}{BLUE}Scanning:{RESET}")
|
|
689
|
+
print(f" Total files scanned: {self.total_files_scanned:>6}")
|
|
690
|
+
print(f" Binary files: {self.total_binary_files:>6}")
|
|
691
|
+
print(f" Text files: {self.total_text_files:>6}\n")
|
|
692
|
+
|
|
693
|
+
# Detection section
|
|
694
|
+
print(f"{BOLD}{BLUE}Media Detection:{RESET}")
|
|
695
|
+
print(f" Media files detected: {self.total_media_detected:>6}")
|
|
696
|
+
print(f" Compatible (no conversion): {GREEN}{self.media_compatible:>6}{RESET}")
|
|
697
|
+
print(f" Incompatible: {YELLOW}{self.media_incompatible:>6}{RESET}")
|
|
698
|
+
print(f" └─ With conversion rule: {self.incompatible_with_conversion_rule:>6}\n")
|
|
699
|
+
|
|
700
|
+
# Conversion section
|
|
701
|
+
if self.conversion_attempted > 0:
|
|
702
|
+
print(f"{BOLD}{BLUE}Conversion:{RESET}")
|
|
703
|
+
print(f" Attempted: {self.conversion_attempted:>6}")
|
|
704
|
+
print(f" Succeeded: {GREEN}{self.conversion_succeeded:>6}{RESET}")
|
|
705
|
+
print(f" Failed: {RED}{self.conversion_failed:>6}{RESET}\n")
|
|
706
|
+
|
|
707
|
+
# Import section
|
|
708
|
+
print(f"{BOLD}{BLUE}Apple Photos Import:{RESET}")
|
|
709
|
+
print(f" Imported (after conversion):{GREEN}{self.imported_after_conversion:>6}{RESET}")
|
|
710
|
+
print(f" Imported (direct): {GREEN}{self.imported_without_conversion:>6}{RESET}")
|
|
711
|
+
print(f" Total imported: {BOLD}{GREEN}{self.total_imported:>6}{RESET}")
|
|
712
|
+
print(f" Refused by Apple Photos: {RED}{self.refused_by_apple_photos:>6}{RESET}")
|
|
713
|
+
|
|
714
|
+
if self.total_imported + self.refused_by_apple_photos > 0:
|
|
715
|
+
success_rate = (self.total_imported / (self.total_imported + self.refused_by_apple_photos)) * 100
|
|
716
|
+
color = GREEN if success_rate >= 95 else YELLOW if success_rate >= 80 else RED
|
|
717
|
+
print(f" Success rate: {color}{success_rate:>5.1f}%{RESET}\n")
|
|
718
|
+
else:
|
|
719
|
+
print()
|
|
720
|
+
|
|
721
|
+
# Skipped section
|
|
722
|
+
total_skipped = self.skipped_errors + self.skipped_unknown_format + self.skipped_corrupt_or_empty + self.skipped_non_media + self.skipped_other
|
|
723
|
+
if total_skipped > 0:
|
|
724
|
+
print(f"{BOLD}{BLUE}Skipped Files:{RESET}")
|
|
725
|
+
print(f" Due to errors: {self.skipped_errors:>6}")
|
|
726
|
+
print(f" Unknown format: {self.skipped_unknown_format:>6}")
|
|
727
|
+
print(f" Corrupt or empty: {self.skipped_corrupt_or_empty:>6}")
|
|
728
|
+
if self.skipped_non_media:
|
|
729
|
+
print(f" Non-media files: {self.skipped_non_media:>6}")
|
|
730
|
+
print(f" Other reasons: {self.skipped_other:>6}")
|
|
731
|
+
print(f" Total skipped: {YELLOW}{total_skipped:>6}{RESET}\n")
|
|
732
|
+
|
|
733
|
+
print(f" Total Files In The STAGING FOLDER: {self.staging_total:>6}")
|
|
734
|
+
print(f" Expected Files In The STAGING FOLDER: {self.staging_expected:>6}\n")
|
|
735
|
+
|
|
736
|
+
# Failed imports detail
|
|
737
|
+
if self.refused_filenames:
|
|
738
|
+
print(f"{BOLD}{RED}Files Refused by Apple Photos:{RESET}")
|
|
739
|
+
for path, reason in self.refused_filenames[:10]: # Show first 10
|
|
740
|
+
print(f" • {path.name}")
|
|
741
|
+
print(f" Reason: {reason}")
|
|
742
|
+
if len(self.refused_filenames) > 10:
|
|
743
|
+
print(f" ... and {len(self.refused_filenames) - 10} more (see log for full list)\n")
|
|
744
|
+
else:
|
|
745
|
+
print()
|
|
746
|
+
|
|
747
|
+
print(f"{BOLD}{'=' * 80}{RESET}\n")
|
|
748
|
+
|
|
749
|
+
def log_summary(self) -> None:
|
|
750
|
+
"""Log the summary to the file logger."""
|
|
751
|
+
LOG.info("=" * 80)
|
|
752
|
+
LOG.info("Run Summary Statistics")
|
|
753
|
+
LOG.info("=" * 80)
|
|
754
|
+
LOG.info(
|
|
755
|
+
"Scanning: total=%d, binary=%d, text=%d",
|
|
756
|
+
self.total_files_scanned,
|
|
757
|
+
self.total_binary_files,
|
|
758
|
+
self.total_text_files,
|
|
759
|
+
)
|
|
760
|
+
LOG.info(
|
|
761
|
+
"Media Detection: detected=%d, compatible=%d, incompatible=%d (with_rule=%d)",
|
|
762
|
+
self.total_media_detected,
|
|
763
|
+
self.media_compatible,
|
|
764
|
+
self.media_incompatible,
|
|
765
|
+
self.incompatible_with_conversion_rule,
|
|
766
|
+
)
|
|
767
|
+
LOG.info(
|
|
768
|
+
"Conversion: attempted=%d, succeeded=%d, failed=%d",
|
|
769
|
+
self.conversion_attempted,
|
|
770
|
+
self.conversion_succeeded,
|
|
771
|
+
self.conversion_failed,
|
|
772
|
+
)
|
|
773
|
+
LOG.info(
|
|
774
|
+
"Import: converted=%d, direct=%d, total=%d, refused=%d",
|
|
775
|
+
self.imported_after_conversion,
|
|
776
|
+
self.imported_without_conversion,
|
|
777
|
+
self.total_imported,
|
|
778
|
+
self.refused_by_apple_photos,
|
|
779
|
+
)
|
|
780
|
+
if self.total_imported + self.refused_by_apple_photos > 0:
|
|
781
|
+
success_rate = (self.total_imported / (self.total_imported + self.refused_by_apple_photos)) * 100
|
|
782
|
+
LOG.info("Success rate: %.1f%%", success_rate)
|
|
783
|
+
LOG.info(
|
|
784
|
+
"Skipped: errors=%d, unknown=%d, corrupt=%d, non_media=%d, other=%d",
|
|
785
|
+
self.skipped_errors,
|
|
786
|
+
self.skipped_unknown_format,
|
|
787
|
+
self.skipped_corrupt_or_empty,
|
|
788
|
+
self.skipped_non_media,
|
|
789
|
+
self.skipped_other,
|
|
790
|
+
)
|
|
791
|
+
LOG.info("Staging: total=%d, expected=%d", self.staging_total, self.staging_expected)
|
|
792
|
+
if self.refused_filenames:
|
|
793
|
+
LOG.info("Refused files:")
|
|
794
|
+
for path, reason in self.refused_filenames:
|
|
795
|
+
LOG.info(" %s: %s", path, reason)
|
|
796
|
+
LOG.info("=" * 80)
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
@dataclass
|
|
800
|
+
class FormatVote:
|
|
801
|
+
tool: str
|
|
802
|
+
mime: Optional[str] = None
|
|
803
|
+
extension: Optional[str] = None
|
|
804
|
+
description: Optional[str] = None
|
|
805
|
+
kind: Optional[str] = None
|
|
806
|
+
error: Optional[str] = None
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
def find_executable(*candidates: str) -> Optional[str]:
|
|
810
|
+
for candidate in candidates:
|
|
811
|
+
path = shutil.which(candidate)
|
|
812
|
+
if path:
|
|
813
|
+
return path
|
|
814
|
+
return None
|
|
815
|
+
|
|
816
|
+
|
|
817
|
+
def resolve_imagemagick_command() -> str:
|
|
818
|
+
cmd = find_executable("magick", "convert")
|
|
819
|
+
if not cmd:
|
|
820
|
+
raise RuntimeError("ImageMagick (magick/convert) not found. Please install imagemagick.")
|
|
821
|
+
return cmd
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def ensure_ffmpeg_path() -> str:
|
|
825
|
+
cmd = find_executable("ffmpeg")
|
|
826
|
+
if not cmd:
|
|
827
|
+
raise RuntimeError("ffmpeg not found. Please install ffmpeg.")
|
|
828
|
+
return cmd
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def is_animated_gif(path: Path) -> bool:
|
|
832
|
+
try:
|
|
833
|
+
with path.open("rb") as handle:
|
|
834
|
+
data = handle.read()
|
|
835
|
+
except OSError:
|
|
836
|
+
return False
|
|
837
|
+
return data.count(b"\x2c") > 1 and b"NETSCAPE2.0" in data
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def is_animated_png(path: Path) -> bool:
|
|
841
|
+
try:
|
|
842
|
+
with path.open("rb") as handle:
|
|
843
|
+
data = handle.read()
|
|
844
|
+
except OSError:
|
|
845
|
+
return False
|
|
846
|
+
return b"acTL" in data
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def is_animated_webp(path: Path) -> bool:
|
|
850
|
+
try:
|
|
851
|
+
with path.open("rb") as handle:
|
|
852
|
+
data = handle.read(65536)
|
|
853
|
+
except OSError:
|
|
854
|
+
return False
|
|
855
|
+
return b"ANIM" in data
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def get_psd_color_mode(path: Path) -> Optional[str]:
|
|
859
|
+
try:
|
|
860
|
+
with path.open("rb") as handle:
|
|
861
|
+
header = handle.read(26)
|
|
862
|
+
except OSError:
|
|
863
|
+
return None
|
|
864
|
+
if len(header) < 26 or header[:4] != b"8BPS":
|
|
865
|
+
return None
|
|
866
|
+
color_mode = int.from_bytes(header[24:26], "big")
|
|
867
|
+
mapping = {
|
|
868
|
+
0: "bitmap",
|
|
869
|
+
1: "grayscale",
|
|
870
|
+
2: "indexed",
|
|
871
|
+
3: "rgb",
|
|
872
|
+
4: "cmyk",
|
|
873
|
+
7: "lab",
|
|
874
|
+
8: "multichannel",
|
|
875
|
+
9: "duotone",
|
|
876
|
+
}
|
|
877
|
+
return mapping.get(color_mode)
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
@dataclass
|
|
881
|
+
class Signature:
|
|
882
|
+
extension: Optional[str] = None
|
|
883
|
+
mime: Optional[str] = None
|
|
884
|
+
|
|
885
|
+
def is_empty(self) -> bool:
|
|
886
|
+
return not self.extension and not self.mime
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def normalize_extension(ext: Optional[str]) -> Optional[str]:
|
|
890
|
+
if not ext:
|
|
891
|
+
return None
|
|
892
|
+
normalized = ext.strip().lower()
|
|
893
|
+
if not normalized:
|
|
894
|
+
return None
|
|
895
|
+
if normalized.startswith("."):
|
|
896
|
+
normalized = normalized[1:]
|
|
897
|
+
return normalized
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def looks_like_text_file(path: Path, max_bytes: int = 4096) -> bool:
|
|
901
|
+
try:
|
|
902
|
+
with path.open("rb") as handle:
|
|
903
|
+
sample = handle.read(max_bytes)
|
|
904
|
+
except OSError:
|
|
905
|
+
return False
|
|
906
|
+
if not sample:
|
|
907
|
+
return True
|
|
908
|
+
if b"\x00" in sample:
|
|
909
|
+
return False
|
|
910
|
+
printable = sum(1 for byte in sample if 32 <= byte <= 126 or byte in (9, 10, 13))
|
|
911
|
+
return printable / len(sample) > 0.9
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
def timestamp() -> str:
|
|
915
|
+
return dt.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
916
|
+
|
|
917
|
+
|
|
918
|
+
def tool_rank(tool: str) -> int:
|
|
919
|
+
try:
|
|
920
|
+
return TOOL_PRIORITY.index(tool)
|
|
921
|
+
except ValueError:
|
|
922
|
+
return len(TOOL_PRIORITY)
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def vote_weight(vote: FormatVote) -> float:
|
|
926
|
+
return TOOL_WEIGHTS.get(vote.tool, 1.0)
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
def collect_raw_groups_from_extensions(exts: Iterable[Optional[str]]) -> set[str]:
|
|
930
|
+
groups: set[str] = set()
|
|
931
|
+
for ext in exts:
|
|
932
|
+
normalized = ensure_dot_extension(ext)
|
|
933
|
+
if not normalized:
|
|
934
|
+
continue
|
|
935
|
+
groups.update(RAW_EXTENSION_TO_GROUPS.get(normalized.lower(), set()))
|
|
936
|
+
return groups
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
def is_raw_extension(ext: Optional[str]) -> bool:
|
|
940
|
+
normalized = ensure_dot_extension(ext)
|
|
941
|
+
return bool(normalized and normalized.lower() in RAW_EXTENSION_TO_GROUPS)
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
def install_raw_dependency_groups(groups: Iterable[str]) -> None:
|
|
945
|
+
needed = set(groups) - _INSTALLED_RAW_GROUPS
|
|
946
|
+
if not needed:
|
|
947
|
+
return
|
|
948
|
+
brew_path = ensure_homebrew()
|
|
949
|
+
for group in sorted(needed):
|
|
950
|
+
config = RAW_DEPENDENCY_GROUPS.get(group)
|
|
951
|
+
if not config:
|
|
952
|
+
continue
|
|
953
|
+
# Install system dependencies (Homebrew packages and casks)
|
|
954
|
+
for package in config.get("brew", []):
|
|
955
|
+
ensure_brew_package(brew_path, package)
|
|
956
|
+
for cask in config.get("cask", []):
|
|
957
|
+
ensure_brew_cask(brew_path, cask)
|
|
958
|
+
# NOTE: Python packages (rawpy) are NOT installed at runtime
|
|
959
|
+
# Users must install with: uv tool install smart-media-manager[enhanced]
|
|
960
|
+
# Or manually: pip install rawpy
|
|
961
|
+
# RAW files will be skipped if rawpy is unavailable (detected via import)
|
|
962
|
+
_INSTALLED_RAW_GROUPS.update(needed)
|
|
963
|
+
|
|
964
|
+
|
|
965
|
+
def refine_raw_media(path: Path, extension_candidates: Iterable[Optional[str]]) -> tuple[Optional[MediaFile], Optional[str]]:
|
|
966
|
+
try:
|
|
967
|
+
with rawpy.imread(str(path)) as raw:
|
|
968
|
+
make = (raw.metadata.camera_make or "").strip()
|
|
969
|
+
model = (raw.metadata.camera_model or "").strip()
|
|
970
|
+
format_name = " ".join(part for part in [make, model] if part) or "raw"
|
|
971
|
+
except rawpy.LibRawFileUnsupportedError:
|
|
972
|
+
return None, "non-media: rawpy unsupported raw"
|
|
973
|
+
except Exception as exc: # pragma: no cover - safeguard
|
|
974
|
+
return None, f"rawpy failed: {exc}"
|
|
975
|
+
|
|
976
|
+
chosen_extension: Optional[str] = None
|
|
977
|
+
for candidate in extension_candidates:
|
|
978
|
+
normalized = ensure_dot_extension(candidate)
|
|
979
|
+
if normalized and normalized.lower() in RAW_EXTENSION_TO_GROUPS:
|
|
980
|
+
chosen_extension = normalized
|
|
981
|
+
break
|
|
982
|
+
if not chosen_extension:
|
|
983
|
+
chosen_extension = ensure_dot_extension(path.suffix) or ".raw"
|
|
984
|
+
|
|
985
|
+
media = MediaFile(
|
|
986
|
+
source=path,
|
|
987
|
+
kind="raw",
|
|
988
|
+
extension=chosen_extension,
|
|
989
|
+
format_name=format_name,
|
|
990
|
+
compatible=True,
|
|
991
|
+
original_suffix=path.suffix,
|
|
992
|
+
)
|
|
993
|
+
media.detected_compatible = media.compatible
|
|
994
|
+
return media, None
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def refine_image_media(media: MediaFile, skip_compatibility_check: bool = False) -> tuple[Optional[MediaFile], Optional[str]]:
|
|
998
|
+
"""
|
|
999
|
+
FAST corruption detection for image files (<10ms for most images).
|
|
1000
|
+
|
|
1001
|
+
Strategy:
|
|
1002
|
+
1. Format-specific quick checks (EOF markers) - microseconds
|
|
1003
|
+
2. PIL load() to decode pixels - catches truncation - milliseconds
|
|
1004
|
+
|
|
1005
|
+
Args:
|
|
1006
|
+
media: MediaFile to validate
|
|
1007
|
+
skip_compatibility_check: If True, skip all validation (for testing)
|
|
1008
|
+
"""
|
|
1009
|
+
# Skip all validation if flag is set (for format testing)
|
|
1010
|
+
if skip_compatibility_check:
|
|
1011
|
+
return media, None
|
|
1012
|
+
|
|
1013
|
+
# FAST CHECK: Format-specific validation (very quick!)
|
|
1014
|
+
path = media.source
|
|
1015
|
+
|
|
1016
|
+
# JPEG: Check SOI and EOI markers (2 reads, <1ms)
|
|
1017
|
+
if media.extension in (".jpg", ".jpeg"):
|
|
1018
|
+
try:
|
|
1019
|
+
with open(path, "rb") as f:
|
|
1020
|
+
# Check Start of Image marker (FFD8)
|
|
1021
|
+
soi = f.read(2)
|
|
1022
|
+
if soi != b"\xff\xd8":
|
|
1023
|
+
return None, "invalid JPEG: missing SOI marker (FFD8)"
|
|
1024
|
+
|
|
1025
|
+
# Check End of Image marker (FFD9)
|
|
1026
|
+
if path.stat().st_size >= 4: # Must have at least SOI + EOI
|
|
1027
|
+
f.seek(-2, 2)
|
|
1028
|
+
eoi = f.read()
|
|
1029
|
+
if eoi != b"\xff\xd9":
|
|
1030
|
+
return None, "truncated JPEG: missing EOI marker (FFD9)"
|
|
1031
|
+
except OSError as e:
|
|
1032
|
+
return None, f"cannot read JPEG markers: {e}"
|
|
1033
|
+
|
|
1034
|
+
# PNG: Check signature and IEND chunk (2 reads, <1ms)
|
|
1035
|
+
elif media.extension == ".png":
|
|
1036
|
+
try:
|
|
1037
|
+
with open(path, "rb") as f:
|
|
1038
|
+
# Check PNG signature
|
|
1039
|
+
sig = f.read(8)
|
|
1040
|
+
if sig != b"\x89PNG\r\n\x1a\n":
|
|
1041
|
+
return None, "invalid PNG: missing signature"
|
|
1042
|
+
|
|
1043
|
+
# Check for IEND chunk at end (last 12 bytes)
|
|
1044
|
+
file_size = path.stat().st_size
|
|
1045
|
+
if file_size >= 20: # Minimum valid PNG size
|
|
1046
|
+
f.seek(-12, 2)
|
|
1047
|
+
chunk_data = f.read(12)
|
|
1048
|
+
if b"IEND" not in chunk_data:
|
|
1049
|
+
return None, "truncated PNG: missing IEND chunk"
|
|
1050
|
+
except OSError as e:
|
|
1051
|
+
return None, f"cannot read PNG chunks: {e}"
|
|
1052
|
+
|
|
1053
|
+
# SPECIAL CHECK: PSD color mode validation
|
|
1054
|
+
# Apple Photos only supports RGB PSD, not CMYK or other modes
|
|
1055
|
+
if media.extension == ".psd":
|
|
1056
|
+
psd_color_mode = media.metadata.get("psd_color_mode", "unknown")
|
|
1057
|
+
if psd_color_mode == "cmyk":
|
|
1058
|
+
return (
|
|
1059
|
+
None,
|
|
1060
|
+
"CMYK PSD not supported by Photos (requires RGB TIFF conversion)",
|
|
1061
|
+
)
|
|
1062
|
+
elif psd_color_mode in ("lab", "multichannel", "duotone"):
|
|
1063
|
+
return (
|
|
1064
|
+
None,
|
|
1065
|
+
f"{psd_color_mode.upper()} PSD not supported by Photos (requires RGB TIFF conversion)",
|
|
1066
|
+
)
|
|
1067
|
+
|
|
1068
|
+
# COMPREHENSIVE CHECK: Actually decode the image (catches all corruption)
|
|
1069
|
+
# This is still fast (<10ms for most images) but thorough
|
|
1070
|
+
try:
|
|
1071
|
+
# First pass: verify headers
|
|
1072
|
+
with Image.open(path) as img:
|
|
1073
|
+
img.verify()
|
|
1074
|
+
|
|
1075
|
+
# CRITICAL: Second pass - actually decode pixel data
|
|
1076
|
+
# Must reopen because verify() invalidates the image!
|
|
1077
|
+
with Image.open(path) as img:
|
|
1078
|
+
img.load() # Force full decode - catches truncation
|
|
1079
|
+
|
|
1080
|
+
# Sanity check dimensions
|
|
1081
|
+
width, height = img.size
|
|
1082
|
+
if width <= 0 or height <= 0:
|
|
1083
|
+
return None, "invalid image dimensions"
|
|
1084
|
+
|
|
1085
|
+
except Image.DecompressionBombError as e:
|
|
1086
|
+
max_pixels = Image.MAX_IMAGE_PIXELS
|
|
1087
|
+
if max_pixels:
|
|
1088
|
+
return (
|
|
1089
|
+
None,
|
|
1090
|
+
f"image exceeds Pillow pixel limit ({max_pixels} pixels): {e}. Set --max-image-pixels none or SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS=none to disable.",
|
|
1091
|
+
)
|
|
1092
|
+
return (
|
|
1093
|
+
None,
|
|
1094
|
+
f"image exceeds Pillow pixel limit: {e}. Set --max-image-pixels none or SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS=none to disable.",
|
|
1095
|
+
)
|
|
1096
|
+
except (OSError, SyntaxError, ValueError) as e:
|
|
1097
|
+
error_msg = str(e).lower()
|
|
1098
|
+
|
|
1099
|
+
# Classify error type for clear messaging
|
|
1100
|
+
if "truncated" in error_msg:
|
|
1101
|
+
return None, f"truncated or corrupt image data: {e}"
|
|
1102
|
+
elif "cannot identify" in error_msg:
|
|
1103
|
+
return None, f"invalid image format: {e}"
|
|
1104
|
+
else:
|
|
1105
|
+
return None, f"image corruption detected: {e}"
|
|
1106
|
+
|
|
1107
|
+
return media, None
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def refine_video_media(media: MediaFile, skip_compatibility_check: bool = False) -> tuple[Optional[MediaFile], Optional[str]]:
|
|
1111
|
+
"""
|
|
1112
|
+
Validate video file compatibility with Apple Photos.
|
|
1113
|
+
|
|
1114
|
+
Checks:
|
|
1115
|
+
- Video codec and codec tag (Dolby Vision, avc3/hev1, 10-bit)
|
|
1116
|
+
- Audio codec compatibility (FLAC, Opus, DTS, etc.)
|
|
1117
|
+
- Audio sample rate (must be standard rate)
|
|
1118
|
+
- Audio channel configuration
|
|
1119
|
+
|
|
1120
|
+
Args:
|
|
1121
|
+
media: MediaFile to validate
|
|
1122
|
+
skip_compatibility_check: If True, skip all validation (for testing)
|
|
1123
|
+
"""
|
|
1124
|
+
# Skip all validation if flag is set (for format testing)
|
|
1125
|
+
if skip_compatibility_check:
|
|
1126
|
+
return media, None
|
|
1127
|
+
|
|
1128
|
+
ffprobe_path = shutil.which("ffprobe")
|
|
1129
|
+
if not ffprobe_path:
|
|
1130
|
+
return media, None
|
|
1131
|
+
|
|
1132
|
+
# Get BOTH video and audio stream info
|
|
1133
|
+
# Note: Don't fail if audio stream missing, just get what's available
|
|
1134
|
+
cmd = [
|
|
1135
|
+
ffprobe_path,
|
|
1136
|
+
"-v",
|
|
1137
|
+
"error",
|
|
1138
|
+
"-show_entries",
|
|
1139
|
+
"stream=codec_type,codec_name,codec_tag_string,width,height,duration,pix_fmt,profile,sample_rate,channels,channel_layout",
|
|
1140
|
+
"-of",
|
|
1141
|
+
"default=noprint_wrappers=1",
|
|
1142
|
+
str(media.source),
|
|
1143
|
+
]
|
|
1144
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
1145
|
+
if result.returncode != 0:
|
|
1146
|
+
return None, "video validation failed"
|
|
1147
|
+
|
|
1148
|
+
output = result.stdout.strip()
|
|
1149
|
+
output_lower = output.lower()
|
|
1150
|
+
media.metadata["ffprobe_info"] = output
|
|
1151
|
+
|
|
1152
|
+
# === VIDEO STREAM VALIDATION ===
|
|
1153
|
+
|
|
1154
|
+
# CRITICAL: Check for incompatible codec tags
|
|
1155
|
+
# Apple requires parameter sets in container (stsd), not in-stream
|
|
1156
|
+
# Look for codec_tag_string field specifically to avoid false positives
|
|
1157
|
+
codec_tag_string = ""
|
|
1158
|
+
for line in output.split("\n"):
|
|
1159
|
+
if "codec_tag_string=" in line.lower():
|
|
1160
|
+
codec_tag_string = line.split("=")[1].strip().lower()
|
|
1161
|
+
break
|
|
1162
|
+
|
|
1163
|
+
incompatible_tags = {
|
|
1164
|
+
"avc3": "H.264 with in-stream parameters (avc3) not compatible; requires avc1",
|
|
1165
|
+
"hev1": "HEVC with in-stream parameters (hev1) not compatible; requires hvc1",
|
|
1166
|
+
"dvhe": "Dolby Vision with in-stream parameters (dvhe) not compatible; requires dvh1",
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
for tag, error_msg in incompatible_tags.items():
|
|
1170
|
+
if tag in codec_tag_string:
|
|
1171
|
+
return None, error_msg
|
|
1172
|
+
|
|
1173
|
+
# Check for Dolby Vision (even dvh1 may have import issues)
|
|
1174
|
+
# Only check codec tag, not entire output (to avoid false positives)
|
|
1175
|
+
if any(tag in codec_tag_string for tag in ["dvh1", "dvav", "dva1"]):
|
|
1176
|
+
return (
|
|
1177
|
+
None,
|
|
1178
|
+
"Dolby Vision HEVC not compatible with Photos (requires standard HEVC transcode)",
|
|
1179
|
+
)
|
|
1180
|
+
|
|
1181
|
+
# Also check for "dolby" in entire output as a backup check
|
|
1182
|
+
if "dolby" in output_lower and "vision" in output_lower:
|
|
1183
|
+
return (
|
|
1184
|
+
None,
|
|
1185
|
+
"Dolby Vision HEVC not compatible with Photos (requires standard HEVC transcode)",
|
|
1186
|
+
)
|
|
1187
|
+
|
|
1188
|
+
# Check for 10-bit color depth
|
|
1189
|
+
if "10le" in output_lower or "10be" in output_lower:
|
|
1190
|
+
return (
|
|
1191
|
+
None,
|
|
1192
|
+
"10-bit color depth not fully compatible with Photos (requires 8-bit transcode)",
|
|
1193
|
+
)
|
|
1194
|
+
|
|
1195
|
+
# === AUDIO STREAM VALIDATION ===
|
|
1196
|
+
|
|
1197
|
+
audio_codec_value = (media.audio_codec or "").lower()
|
|
1198
|
+
if audio_codec_value:
|
|
1199
|
+
unsupported_audio = {
|
|
1200
|
+
"flac": "FLAC audio not supported by Photos (requires AAC transcode)",
|
|
1201
|
+
"opus": "Opus audio not supported by Photos (requires AAC transcode)",
|
|
1202
|
+
"dts": "DTS audio not supported by Photos (requires AC-3/EAC-3 transcode)",
|
|
1203
|
+
"dts-hd": "DTS-HD audio not supported by Photos (requires AC-3/EAC-3 transcode)",
|
|
1204
|
+
"truehd": "Dolby TrueHD audio not supported by Photos (requires AC-3/EAC-3 transcode)",
|
|
1205
|
+
"vorbis": "Vorbis audio not supported by Photos (requires AAC transcode)",
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
for unsupported_codec, error_msg in unsupported_audio.items():
|
|
1209
|
+
if unsupported_codec in audio_codec_value:
|
|
1210
|
+
return None, error_msg
|
|
1211
|
+
|
|
1212
|
+
sample_rate = media.audio_sample_rate
|
|
1213
|
+
if sample_rate is None:
|
|
1214
|
+
current_stream_type = None
|
|
1215
|
+
for line in output.split("\n"):
|
|
1216
|
+
lower = line.lower()
|
|
1217
|
+
if lower.startswith("codec_type="):
|
|
1218
|
+
current_stream_type = lower.split("=", 1)[1].strip()
|
|
1219
|
+
elif current_stream_type == "audio" and lower.startswith("sample_rate="):
|
|
1220
|
+
try:
|
|
1221
|
+
sample_rate = int(lower.split("=", 1)[1].strip())
|
|
1222
|
+
except (ValueError, IndexError):
|
|
1223
|
+
sample_rate = None
|
|
1224
|
+
break
|
|
1225
|
+
|
|
1226
|
+
if sample_rate:
|
|
1227
|
+
standard_rates = {
|
|
1228
|
+
8000,
|
|
1229
|
+
11025,
|
|
1230
|
+
12000,
|
|
1231
|
+
16000,
|
|
1232
|
+
22050,
|
|
1233
|
+
24000,
|
|
1234
|
+
32000,
|
|
1235
|
+
44100,
|
|
1236
|
+
48000,
|
|
1237
|
+
88200,
|
|
1238
|
+
96000,
|
|
1239
|
+
176400,
|
|
1240
|
+
192000,
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
if sample_rate not in standard_rates:
|
|
1244
|
+
return None, f"Unsupported audio sample rate {sample_rate} Hz (requires resampling to 48000 Hz)"
|
|
1245
|
+
|
|
1246
|
+
return media, None
|
|
1247
|
+
|
|
1248
|
+
|
|
1249
|
+
def run_command_with_progress(command: list[str], message: str, env: Optional[dict[str, str]] = None) -> None:
|
|
1250
|
+
bar_length = 28
|
|
1251
|
+
start = time.time()
|
|
1252
|
+
fd, tmp_name = tempfile.mkstemp(prefix="smm_cmd_", suffix=".log")
|
|
1253
|
+
os.close(fd)
|
|
1254
|
+
capture_path = Path(tmp_name)
|
|
1255
|
+
try:
|
|
1256
|
+
with capture_path.open("w", encoding="utf-8") as capture_writer:
|
|
1257
|
+
with subprocess.Popen(
|
|
1258
|
+
command,
|
|
1259
|
+
stdout=capture_writer,
|
|
1260
|
+
stderr=subprocess.STDOUT,
|
|
1261
|
+
text=True,
|
|
1262
|
+
env=env or os.environ.copy(),
|
|
1263
|
+
) as proc:
|
|
1264
|
+
while True:
|
|
1265
|
+
ret = proc.poll()
|
|
1266
|
+
elapsed = time.time() - start
|
|
1267
|
+
progress = (elapsed % bar_length) / (bar_length - 1)
|
|
1268
|
+
filled = int(progress * bar_length)
|
|
1269
|
+
bar = "#" * filled + "-" * (bar_length - filled)
|
|
1270
|
+
sys.stdout.write(f"\r{message} [{bar}]")
|
|
1271
|
+
sys.stdout.flush()
|
|
1272
|
+
if ret is not None:
|
|
1273
|
+
break
|
|
1274
|
+
time.sleep(0.2)
|
|
1275
|
+
sys.stdout.write("\r" + " " * (len(message) + bar_length + 3) + "\r")
|
|
1276
|
+
sys.stdout.flush()
|
|
1277
|
+
if proc.returncode != 0:
|
|
1278
|
+
output_tail = ""
|
|
1279
|
+
try:
|
|
1280
|
+
with capture_path.open("r", encoding="utf-8") as capture_reader:
|
|
1281
|
+
data = capture_reader.read()
|
|
1282
|
+
output_tail = data[-4000:].strip()
|
|
1283
|
+
except Exception:
|
|
1284
|
+
output_tail = "(failed to read command output)"
|
|
1285
|
+
error_message = f"Command '{command[0]}' failed with exit code {proc.returncode}."
|
|
1286
|
+
if output_tail:
|
|
1287
|
+
LOG.error("%s Output:\n%s", error_message, output_tail)
|
|
1288
|
+
raise RuntimeError(error_message)
|
|
1289
|
+
finally:
|
|
1290
|
+
with suppress(OSError):
|
|
1291
|
+
capture_path.unlink()
|
|
1292
|
+
|
|
1293
|
+
|
|
1294
|
+
def ensure_homebrew() -> str:
|
|
1295
|
+
global _BREW_PATH_CACHE
|
|
1296
|
+
if _BREW_PATH_CACHE and Path(_BREW_PATH_CACHE).exists():
|
|
1297
|
+
return _BREW_PATH_CACHE
|
|
1298
|
+
brew_path = shutil.which("brew")
|
|
1299
|
+
if brew_path:
|
|
1300
|
+
_BREW_PATH_CACHE = brew_path
|
|
1301
|
+
return brew_path
|
|
1302
|
+
install_cmd = [
|
|
1303
|
+
"/bin/bash",
|
|
1304
|
+
"-lc",
|
|
1305
|
+
'NONINTERACTIVE=1 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"',
|
|
1306
|
+
]
|
|
1307
|
+
run_command_with_progress(install_cmd, "Installing Homebrew")
|
|
1308
|
+
possible_paths = ["/opt/homebrew/bin/brew", "/usr/local/bin/brew"]
|
|
1309
|
+
for candidate in possible_paths:
|
|
1310
|
+
if Path(candidate).exists():
|
|
1311
|
+
os.environ["PATH"] = f"{Path(candidate).parent}:{os.environ.get('PATH', '')}"
|
|
1312
|
+
_BREW_PATH_CACHE = str(Path(candidate))
|
|
1313
|
+
return _BREW_PATH_CACHE
|
|
1314
|
+
brew_path = shutil.which("brew")
|
|
1315
|
+
if not brew_path:
|
|
1316
|
+
raise RuntimeError("Homebrew installation succeeded but brew binary not found in PATH.")
|
|
1317
|
+
_BREW_PATH_CACHE = brew_path
|
|
1318
|
+
return brew_path
|
|
1319
|
+
|
|
1320
|
+
|
|
1321
|
+
def brew_package_installed(brew_path: str, package: str) -> bool:
|
|
1322
|
+
check_cmd = [brew_path, "list", package]
|
|
1323
|
+
result = subprocess.run(check_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
1324
|
+
return result.returncode == 0
|
|
1325
|
+
|
|
1326
|
+
|
|
1327
|
+
def ensure_brew_package(brew_path: str, package: str) -> None:
|
|
1328
|
+
if not brew_package_installed(brew_path, package):
|
|
1329
|
+
try:
|
|
1330
|
+
run_command_with_progress([brew_path, "install", "--quiet", package], f"Installing {package}")
|
|
1331
|
+
except RuntimeError as exc: # pragma: no cover - depends on user env
|
|
1332
|
+
raise RuntimeError(f"Failed to install {package} via Homebrew. Install it manually (brew install {package}) or rerun with --skip-bootstrap.") from exc
|
|
1333
|
+
else:
|
|
1334
|
+
LOG.debug("Package %s already installed; skipping upgrade to avoid repeated downloads.", package)
|
|
1335
|
+
|
|
1336
|
+
|
|
1337
|
+
def brew_cask_installed(brew_path: str, cask: str) -> bool:
|
|
1338
|
+
check_cmd = [brew_path, "list", "--cask", cask]
|
|
1339
|
+
result = subprocess.run(check_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
1340
|
+
return result.returncode == 0
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
def ensure_brew_cask(brew_path: str, cask: str) -> None:
|
|
1344
|
+
if not brew_cask_installed(brew_path, cask):
|
|
1345
|
+
try:
|
|
1346
|
+
run_command_with_progress([brew_path, "install", "--cask", "--quiet", cask], f"Installing {cask}")
|
|
1347
|
+
except RuntimeError as exc: # pragma: no cover
|
|
1348
|
+
raise RuntimeError(f"Failed to install {cask} via Homebrew. Install it manually (brew install --cask {cask}) or rerun with --skip-bootstrap.") from exc
|
|
1349
|
+
else:
|
|
1350
|
+
LOG.debug("Cask %s already installed; skipping upgrade to avoid repeated downloads.", cask)
|
|
1351
|
+
|
|
1352
|
+
|
|
1353
|
+
def pip_package_installed(package: str) -> bool:
|
|
1354
|
+
if package in _PIP_PACKAGE_CACHE:
|
|
1355
|
+
return True
|
|
1356
|
+
check_cmd = [sys.executable, "-m", "pip", "show", package]
|
|
1357
|
+
result = subprocess.run(check_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
1358
|
+
if result.returncode == 0:
|
|
1359
|
+
_PIP_PACKAGE_CACHE.add(package)
|
|
1360
|
+
return True
|
|
1361
|
+
return False
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
def ensure_pip_package(package: str) -> None:
|
|
1365
|
+
try:
|
|
1366
|
+
if not pip_package_installed(package):
|
|
1367
|
+
run_command_with_progress(
|
|
1368
|
+
[sys.executable, "-m", "pip", "install", "--upgrade", package],
|
|
1369
|
+
f"Installing {package}",
|
|
1370
|
+
)
|
|
1371
|
+
_PIP_PACKAGE_CACHE.add(package)
|
|
1372
|
+
else:
|
|
1373
|
+
run_command_with_progress(
|
|
1374
|
+
[sys.executable, "-m", "pip", "install", "--upgrade", package],
|
|
1375
|
+
f"Updating {package}",
|
|
1376
|
+
)
|
|
1377
|
+
except RuntimeError as exc:
|
|
1378
|
+
# Pip install failed (likely compilation issues for packages with C extensions like rawpy)
|
|
1379
|
+
# Log warning and continue - files requiring this package will be skipped
|
|
1380
|
+
LOG.warning(
|
|
1381
|
+
"Failed to install Python package '%s': %s. Files requiring this package will be skipped. Try installing manually with 'pip install %s' or use --skip-bootstrap to bypass.",
|
|
1382
|
+
package,
|
|
1383
|
+
exc,
|
|
1384
|
+
package,
|
|
1385
|
+
)
|
|
1386
|
+
|
|
1387
|
+
|
|
1388
|
+
def ensure_system_dependencies() -> None:
|
|
1389
|
+
brew_path = ensure_homebrew()
|
|
1390
|
+
for package in REQUIRED_BREW_PACKAGES.values():
|
|
1391
|
+
ensure_brew_package(brew_path, package)
|
|
1392
|
+
|
|
1393
|
+
|
|
1394
|
+
def copy_metadata_from_source(source: Path, target: Path) -> None:
|
|
1395
|
+
"""Copy all metadata from source to target using exiftool with comprehensive field translation.
|
|
1396
|
+
|
|
1397
|
+
Uses exiftool's built-in metadata translation to handle cross-format field mapping:
|
|
1398
|
+
- EXIF:DateTimeOriginal → XMP:CreateDate (when needed)
|
|
1399
|
+
- IPTC:Caption → XMP:Description (when needed)
|
|
1400
|
+
- Preserves GPS, copyright, camera info, etc.
|
|
1401
|
+
|
|
1402
|
+
ExifTool automatically normalizes field names across EXIF, IPTC, and XMP standards,
|
|
1403
|
+
acting as a metadata translation layer similar to our UUID system for format names.
|
|
1404
|
+
"""
|
|
1405
|
+
exiftool = find_executable("exiftool")
|
|
1406
|
+
if not exiftool or not source.exists() or not target.exists():
|
|
1407
|
+
return
|
|
1408
|
+
cmd = [
|
|
1409
|
+
exiftool,
|
|
1410
|
+
"-overwrite_original",
|
|
1411
|
+
"-TagsFromFile",
|
|
1412
|
+
str(source),
|
|
1413
|
+
"-all:all", # Copy all writable tags preserving group structure
|
|
1414
|
+
"-unsafe", # Include normally unsafe tags (needed for some JPEG repairs)
|
|
1415
|
+
str(target),
|
|
1416
|
+
]
|
|
1417
|
+
try:
|
|
1418
|
+
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
1419
|
+
LOG.debug("Metadata copied from %s to %s via exiftool", source.name, target.name)
|
|
1420
|
+
except Exception as e:
|
|
1421
|
+
LOG.debug("Exiftool metadata copy failed for %s -> %s: %s", source, target, e)
|
|
1422
|
+
|
|
1423
|
+
|
|
1424
|
+
def ensure_raw_dependencies_for_files(media_files: Iterable[MediaFile]) -> None:
|
|
1425
|
+
required_groups: set[str] = set()
|
|
1426
|
+
for media in media_files:
|
|
1427
|
+
required_groups.update(collect_raw_groups_from_extensions([media.extension, media.original_suffix]))
|
|
1428
|
+
if not required_groups:
|
|
1429
|
+
return
|
|
1430
|
+
install_raw_dependency_groups(required_groups)
|
|
1431
|
+
|
|
1432
|
+
|
|
1433
|
+
def normalize_mime_value(mime: Optional[str]) -> Optional[str]:
|
|
1434
|
+
if not mime:
|
|
1435
|
+
return None
|
|
1436
|
+
normalized = mime.strip().lower()
|
|
1437
|
+
return normalized or None
|
|
1438
|
+
|
|
1439
|
+
|
|
1440
|
+
def is_textual_mime(mime: Optional[str]) -> bool:
|
|
1441
|
+
mime_val = normalize_mime_value(mime)
|
|
1442
|
+
if not mime_val:
|
|
1443
|
+
return False
|
|
1444
|
+
if mime_val.startswith("text/"):
|
|
1445
|
+
return True
|
|
1446
|
+
return mime_val in TEXTUAL_MIME_HINTS
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
def ensure_dot_extension(ext: Optional[str]) -> Optional[str]:
|
|
1450
|
+
if not ext:
|
|
1451
|
+
return None
|
|
1452
|
+
normalized = ext.strip().lower()
|
|
1453
|
+
if not normalized:
|
|
1454
|
+
return None
|
|
1455
|
+
if not normalized.startswith("."):
|
|
1456
|
+
normalized = f".{normalized}"
|
|
1457
|
+
return normalized
|
|
1458
|
+
|
|
1459
|
+
|
|
1460
|
+
def canonicalize_extension(ext: Optional[str]) -> Optional[str]:
|
|
1461
|
+
"""
|
|
1462
|
+
Canonicalize media file extension variants to preferred forms.
|
|
1463
|
+
|
|
1464
|
+
Examples:
|
|
1465
|
+
.jfif, .jpeg → .jpg
|
|
1466
|
+
.tif → .tiff
|
|
1467
|
+
|
|
1468
|
+
This ensures consistent extension naming regardless of which detection tool
|
|
1469
|
+
returned the extension. Only handles media files (image/video/RAW formats).
|
|
1470
|
+
Non-media files (HTML, text, etc.) are filtered out before this function is called.
|
|
1471
|
+
"""
|
|
1472
|
+
if not ext:
|
|
1473
|
+
return None
|
|
1474
|
+
|
|
1475
|
+
# Ensure normalized form (lowercase, with dot)
|
|
1476
|
+
normalized = ensure_dot_extension(ext)
|
|
1477
|
+
if not normalized:
|
|
1478
|
+
return None
|
|
1479
|
+
|
|
1480
|
+
# Canonical extension mappings for MEDIA FILES ONLY
|
|
1481
|
+
# Format: variant → canonical
|
|
1482
|
+
CANONICAL_EXTENSIONS = {
|
|
1483
|
+
# JPEG variants → .jpg
|
|
1484
|
+
".jfif": ".jpg",
|
|
1485
|
+
".jpeg": ".jpg",
|
|
1486
|
+
".jpe": ".jpg",
|
|
1487
|
+
# TIFF variants → .tiff
|
|
1488
|
+
".tif": ".tiff",
|
|
1489
|
+
# Add more media format variants as needed based on detection tool outputs
|
|
1490
|
+
}
|
|
1491
|
+
|
|
1492
|
+
return CANONICAL_EXTENSIONS.get(normalized, normalized)
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
def kind_from_mime(mime: Optional[str]) -> Optional[str]:
|
|
1496
|
+
mime_val = normalize_mime_value(mime)
|
|
1497
|
+
if not mime_val:
|
|
1498
|
+
return None
|
|
1499
|
+
if mime_val.startswith("image/"):
|
|
1500
|
+
return "image"
|
|
1501
|
+
if mime_val.startswith("video/"):
|
|
1502
|
+
return "video"
|
|
1503
|
+
if mime_val.startswith("audio/"):
|
|
1504
|
+
return "audio"
|
|
1505
|
+
return None
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
def kind_from_extension(ext: Optional[str]) -> Optional[str]:
|
|
1509
|
+
norm = normalize_extension(ext)
|
|
1510
|
+
if not norm:
|
|
1511
|
+
return None
|
|
1512
|
+
ext_with_dot = ensure_dot_extension(norm)
|
|
1513
|
+
if ext_with_dot and ext_with_dot.lower() in RAW_EXTENSION_TO_GROUPS:
|
|
1514
|
+
return "raw"
|
|
1515
|
+
if ext_with_dot in COMPATIBLE_IMAGE_EXTENSIONS or norm in ALL_IMAGE_EXTENSIONS:
|
|
1516
|
+
return "image"
|
|
1517
|
+
if ext_with_dot in VIDEO_EXTENSION_MAP.values():
|
|
1518
|
+
return "video"
|
|
1519
|
+
return None
|
|
1520
|
+
|
|
1521
|
+
|
|
1522
|
+
def kind_from_description(description: Optional[str]) -> Optional[str]:
|
|
1523
|
+
if not description:
|
|
1524
|
+
return None
|
|
1525
|
+
lowered = description.lower()
|
|
1526
|
+
if "disk image" not in lowered and any(word in lowered for word in ("image", "jpeg", "jpg", "png", "photo", "bitmap")):
|
|
1527
|
+
return "image"
|
|
1528
|
+
if any(word in lowered for word in ("video", "movie", "mpeg", "quicktime", "mp4", "h264", "h.264")):
|
|
1529
|
+
return "video"
|
|
1530
|
+
if any(word in lowered for word in ("audio", "sound", "mp3", "aac", "alac")):
|
|
1531
|
+
return "audio"
|
|
1532
|
+
if any(
|
|
1533
|
+
word in lowered
|
|
1534
|
+
for word in (
|
|
1535
|
+
"raw",
|
|
1536
|
+
"cr2",
|
|
1537
|
+
"cr3",
|
|
1538
|
+
"nef",
|
|
1539
|
+
"arw",
|
|
1540
|
+
"raf",
|
|
1541
|
+
"orf",
|
|
1542
|
+
"rw2",
|
|
1543
|
+
"dng",
|
|
1544
|
+
"iiq",
|
|
1545
|
+
"3fr",
|
|
1546
|
+
"x3f",
|
|
1547
|
+
)
|
|
1548
|
+
):
|
|
1549
|
+
return "raw"
|
|
1550
|
+
return None
|
|
1551
|
+
|
|
1552
|
+
|
|
1553
|
+
def extension_from_mime(mime: Optional[str]) -> Optional[str]:
|
|
1554
|
+
mime_val = normalize_mime_value(mime)
|
|
1555
|
+
if not mime_val:
|
|
1556
|
+
return None
|
|
1557
|
+
ext = IMAGE_MIME_EXTENSION_MAP.get(mime_val)
|
|
1558
|
+
if not ext:
|
|
1559
|
+
ext = VIDEO_MIME_EXTENSION_MAP.get(mime_val)
|
|
1560
|
+
if not ext:
|
|
1561
|
+
ext = mimetypes.guess_extension(mime_val)
|
|
1562
|
+
return ensure_dot_extension(ext)
|
|
1563
|
+
|
|
1564
|
+
|
|
1565
|
+
def extension_from_description(description: Optional[str]) -> Optional[str]:
|
|
1566
|
+
if not description:
|
|
1567
|
+
return None
|
|
1568
|
+
lowered = description.lower()
|
|
1569
|
+
mapping = {
|
|
1570
|
+
".jpg": ("jpeg", "jpg"),
|
|
1571
|
+
".png": ("png",),
|
|
1572
|
+
".gif": ("gif",),
|
|
1573
|
+
".bmp": ("bitmap", "bmp"),
|
|
1574
|
+
".tiff": ("tiff", "tif"),
|
|
1575
|
+
".heic": ("heic", "heif"),
|
|
1576
|
+
".mp4": ("mp4", "mpeg-4", "h.264", "h264"),
|
|
1577
|
+
".mov": ("quicktime", "mov"),
|
|
1578
|
+
".m4v": ("m4v",),
|
|
1579
|
+
".webm": ("webm",),
|
|
1580
|
+
".avi": ("avi",),
|
|
1581
|
+
".mkv": ("matroska", "mkv"),
|
|
1582
|
+
}
|
|
1583
|
+
for ext, keywords in mapping.items():
|
|
1584
|
+
if any(keyword in lowered for keyword in keywords):
|
|
1585
|
+
return ext
|
|
1586
|
+
return None
|
|
1587
|
+
|
|
1588
|
+
|
|
1589
|
+
def is_supported_video_codec(codec: Optional[str]) -> bool:
|
|
1590
|
+
if not codec:
|
|
1591
|
+
return False
|
|
1592
|
+
codec_lower = codec.lower()
|
|
1593
|
+
return codec_lower in COMPATIBLE_VIDEO_CODECS
|
|
1594
|
+
|
|
1595
|
+
|
|
1596
|
+
def choose_vote_by_priority(
|
|
1597
|
+
votes: Iterable[FormatVote],
|
|
1598
|
+
predicate: Callable[[FormatVote], bool],
|
|
1599
|
+
) -> Optional[FormatVote]:
|
|
1600
|
+
for tool in TOOL_PRIORITY:
|
|
1601
|
+
for vote in votes:
|
|
1602
|
+
if vote.tool == tool and predicate(vote):
|
|
1603
|
+
return vote
|
|
1604
|
+
return None
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def select_consensus_vote(votes: list[FormatVote]) -> Optional[FormatVote]:
|
|
1608
|
+
valid_votes = [vote for vote in votes if not vote.error and (vote.mime or vote.extension or vote.description)]
|
|
1609
|
+
if not valid_votes:
|
|
1610
|
+
return None
|
|
1611
|
+
|
|
1612
|
+
mime_weights: dict[str, float] = {}
|
|
1613
|
+
for vote in valid_votes:
|
|
1614
|
+
mime_val = normalize_mime_value(vote.mime)
|
|
1615
|
+
if mime_val:
|
|
1616
|
+
mime_weights[mime_val] = mime_weights.get(mime_val, 0.0) + vote_weight(vote)
|
|
1617
|
+
if mime_weights:
|
|
1618
|
+
top_weight = max(mime_weights.values())
|
|
1619
|
+
top_mimes = {mime for mime, weight in mime_weights.items() if math.isclose(weight, top_weight, rel_tol=1e-9, abs_tol=1e-9)}
|
|
1620
|
+
choice = choose_vote_by_priority(valid_votes, lambda v: normalize_mime_value(v.mime) in top_mimes)
|
|
1621
|
+
if choice:
|
|
1622
|
+
return choice
|
|
1623
|
+
|
|
1624
|
+
ext_weights: dict[str, float] = {}
|
|
1625
|
+
for vote in valid_votes:
|
|
1626
|
+
ext_val = ensure_dot_extension(vote.extension)
|
|
1627
|
+
if ext_val:
|
|
1628
|
+
ext_weights[ext_val] = ext_weights.get(ext_val, 0.0) + vote_weight(vote)
|
|
1629
|
+
if ext_weights:
|
|
1630
|
+
top_weight = max(ext_weights.values())
|
|
1631
|
+
top_exts = {ext for ext, weight in ext_weights.items() if math.isclose(weight, top_weight, rel_tol=1e-9, abs_tol=1e-9)}
|
|
1632
|
+
choice = choose_vote_by_priority(valid_votes, lambda v: ensure_dot_extension(v.extension) in top_exts)
|
|
1633
|
+
if choice:
|
|
1634
|
+
return choice
|
|
1635
|
+
|
|
1636
|
+
return max(
|
|
1637
|
+
valid_votes,
|
|
1638
|
+
key=lambda v: (vote_weight(v), -tool_rank(v.tool)),
|
|
1639
|
+
default=None,
|
|
1640
|
+
)
|
|
1641
|
+
|
|
1642
|
+
|
|
1643
|
+
def determine_media_kind(votes: list[FormatVote], consensus: Optional[FormatVote]) -> Optional[str]:
|
|
1644
|
+
kind_weights: dict[str, float] = {}
|
|
1645
|
+
candidate_votes: list[FormatVote] = []
|
|
1646
|
+
for vote in votes:
|
|
1647
|
+
if vote.error:
|
|
1648
|
+
continue
|
|
1649
|
+
inferred = vote.kind or kind_from_mime(vote.mime) or kind_from_extension(vote.extension) or kind_from_description(vote.description)
|
|
1650
|
+
if inferred:
|
|
1651
|
+
weight = vote_weight(vote)
|
|
1652
|
+
kind_weights[inferred] = kind_weights.get(inferred, 0.0) + weight
|
|
1653
|
+
candidate_votes.append(vote)
|
|
1654
|
+
|
|
1655
|
+
if kind_weights:
|
|
1656
|
+
top_weight = max(kind_weights.values())
|
|
1657
|
+
top_kinds = {kind for kind, weight in kind_weights.items() if math.isclose(weight, top_weight, rel_tol=1e-9, abs_tol=1e-9)}
|
|
1658
|
+
if consensus:
|
|
1659
|
+
consensus_kind = consensus.kind or kind_from_mime(consensus.mime) or kind_from_extension(consensus.extension) or kind_from_description(consensus.description)
|
|
1660
|
+
if consensus_kind and consensus_kind in top_kinds:
|
|
1661
|
+
return consensus_kind
|
|
1662
|
+
choice = choose_vote_by_priority(
|
|
1663
|
+
candidate_votes,
|
|
1664
|
+
lambda v: (v.kind or kind_from_mime(v.mime) or kind_from_extension(v.extension) or kind_from_description(v.description)) in top_kinds,
|
|
1665
|
+
)
|
|
1666
|
+
if choice:
|
|
1667
|
+
return choice.kind or kind_from_mime(choice.mime) or kind_from_extension(choice.extension) or kind_from_description(choice.description)
|
|
1668
|
+
|
|
1669
|
+
if consensus:
|
|
1670
|
+
return consensus.kind or kind_from_mime(consensus.mime) or kind_from_extension(consensus.extension) or kind_from_description(consensus.description)
|
|
1671
|
+
return None
|
|
1672
|
+
|
|
1673
|
+
|
|
1674
|
+
def votes_error_summary(votes: list[FormatVote]) -> str:
|
|
1675
|
+
error_messages = [f"{vote.tool}: {vote.error}" for vote in votes if vote.error]
|
|
1676
|
+
if error_messages:
|
|
1677
|
+
return "; ".join(error_messages)
|
|
1678
|
+
return "detectors could not agree on a media format"
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
def collect_format_votes(path: Path, puremagic_signature: Optional[Signature] = None) -> list[FormatVote]:
|
|
1682
|
+
return [
|
|
1683
|
+
classify_with_libmagic(path),
|
|
1684
|
+
classify_with_puremagic(path, puremagic_signature),
|
|
1685
|
+
classify_with_pyfsig(path),
|
|
1686
|
+
classify_with_binwalk(path),
|
|
1687
|
+
]
|
|
1688
|
+
|
|
1689
|
+
|
|
1690
|
+
def classify_with_libmagic(path: Path) -> FormatVote:
|
|
1691
|
+
if magic is None:
|
|
1692
|
+
return FormatVote(tool="libmagic", error="libmagic not yet installed")
|
|
1693
|
+
global _MAGIC_MIME, _MAGIC_DESC
|
|
1694
|
+
try:
|
|
1695
|
+
if _MAGIC_MIME is None:
|
|
1696
|
+
_MAGIC_MIME = magic.Magic(mime=True)
|
|
1697
|
+
if _MAGIC_DESC is None:
|
|
1698
|
+
_MAGIC_DESC = magic.Magic()
|
|
1699
|
+
raw_mime = _MAGIC_MIME.from_file(str(path)) if _MAGIC_MIME else None
|
|
1700
|
+
mime = normalize_mime_value(raw_mime)
|
|
1701
|
+
description = _MAGIC_DESC.from_file(str(path)) if _MAGIC_DESC else None
|
|
1702
|
+
extension = extension_from_mime(mime) or extension_from_description(description)
|
|
1703
|
+
kind = kind_from_mime(mime) or kind_from_description(description)
|
|
1704
|
+
if not mime and not description:
|
|
1705
|
+
return FormatVote(tool="libmagic", error="no match")
|
|
1706
|
+
return FormatVote(
|
|
1707
|
+
tool="libmagic",
|
|
1708
|
+
mime=mime,
|
|
1709
|
+
description=description,
|
|
1710
|
+
extension=extension,
|
|
1711
|
+
kind=kind,
|
|
1712
|
+
)
|
|
1713
|
+
except Exception as exc: # pragma: no cover - runtime safety
|
|
1714
|
+
return FormatVote(tool="libmagic", error=str(exc))
|
|
1715
|
+
|
|
1716
|
+
|
|
1717
|
+
def classify_with_puremagic(path: Path, signature: Optional[Signature] = None) -> FormatVote:
|
|
1718
|
+
if signature is None:
|
|
1719
|
+
signature = safe_puremagic_guess(path)
|
|
1720
|
+
if signature.is_empty():
|
|
1721
|
+
return FormatVote(tool="puremagic", error="no match")
|
|
1722
|
+
extension = None
|
|
1723
|
+
if signature.extension:
|
|
1724
|
+
image_ext = canonical_image_extension(signature.extension)
|
|
1725
|
+
video_ext = canonical_video_extension(signature.extension)
|
|
1726
|
+
extension = image_ext or video_ext or ensure_dot_extension(signature.extension)
|
|
1727
|
+
mime = normalize_mime_value(signature.mime)
|
|
1728
|
+
kind = kind_from_mime(mime) or kind_from_extension(extension)
|
|
1729
|
+
description = None
|
|
1730
|
+
if signature.mime:
|
|
1731
|
+
description = signature.mime
|
|
1732
|
+
return FormatVote(
|
|
1733
|
+
tool="puremagic",
|
|
1734
|
+
mime=mime,
|
|
1735
|
+
extension=extension,
|
|
1736
|
+
description=description,
|
|
1737
|
+
kind=kind,
|
|
1738
|
+
)
|
|
1739
|
+
|
|
1740
|
+
|
|
1741
|
+
def classify_with_pyfsig(path: Path) -> FormatVote:
|
|
1742
|
+
try:
|
|
1743
|
+
matches = pyfsig_interface.find_matches_for_file_path(str(path))
|
|
1744
|
+
except Exception as exc: # pragma: no cover - runtime safety
|
|
1745
|
+
return FormatVote(tool="pyfsig", error=str(exc))
|
|
1746
|
+
if not matches:
|
|
1747
|
+
return FormatVote(tool="pyfsig", error="no signature match")
|
|
1748
|
+
match = matches[0]
|
|
1749
|
+
extension = ensure_dot_extension(match.file_extension)
|
|
1750
|
+
description = match.description
|
|
1751
|
+
kind = kind_from_extension(extension) or kind_from_description(description)
|
|
1752
|
+
return FormatVote(
|
|
1753
|
+
tool="pyfsig",
|
|
1754
|
+
extension=extension,
|
|
1755
|
+
description=description,
|
|
1756
|
+
kind=kind,
|
|
1757
|
+
)
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
def classify_with_binwalk(path: Path) -> FormatVote:
|
|
1761
|
+
if not BINWALK_EXECUTABLE:
|
|
1762
|
+
return FormatVote(tool="binwalk", error="binwalk executable not found")
|
|
1763
|
+
try:
|
|
1764
|
+
result = subprocess.run(
|
|
1765
|
+
[BINWALK_EXECUTABLE, "--signature", "--length", "0", str(path)],
|
|
1766
|
+
capture_output=True,
|
|
1767
|
+
text=True,
|
|
1768
|
+
check=False,
|
|
1769
|
+
)
|
|
1770
|
+
except Exception as exc: # pragma: no cover - runtime safety
|
|
1771
|
+
return FormatVote(tool="binwalk", error=str(exc))
|
|
1772
|
+
if result.returncode not in (0, 1): # binwalk returns 1 when no signatures match
|
|
1773
|
+
return FormatVote(
|
|
1774
|
+
tool="binwalk",
|
|
1775
|
+
error=result.stderr.strip() or f"exit code {result.returncode}",
|
|
1776
|
+
)
|
|
1777
|
+
description = None
|
|
1778
|
+
for line in result.stdout.splitlines():
|
|
1779
|
+
stripped = line.strip()
|
|
1780
|
+
if not stripped or stripped.upper().startswith("DECIMAL") or stripped.startswith("--"):
|
|
1781
|
+
continue
|
|
1782
|
+
parts = stripped.split(None, 2)
|
|
1783
|
+
if len(parts) == 3:
|
|
1784
|
+
description = parts[2]
|
|
1785
|
+
break
|
|
1786
|
+
if not description:
|
|
1787
|
+
return FormatVote(tool="binwalk", error="no signature match")
|
|
1788
|
+
extension = extension_from_description(description)
|
|
1789
|
+
kind = kind_from_description(description) or kind_from_extension(extension)
|
|
1790
|
+
return FormatVote(
|
|
1791
|
+
tool="binwalk",
|
|
1792
|
+
description=description,
|
|
1793
|
+
extension=extension,
|
|
1794
|
+
kind=kind,
|
|
1795
|
+
)
|
|
1796
|
+
|
|
1797
|
+
|
|
1798
|
+
def sanitize_path_string(path_str: str) -> str:
|
|
1799
|
+
"""Clean and normalize path string, handling unicode and control characters.
|
|
1800
|
+
|
|
1801
|
+
Args:
|
|
1802
|
+
path_str: Raw path string that may contain unicode, diacritics, or control characters
|
|
1803
|
+
|
|
1804
|
+
Returns:
|
|
1805
|
+
Sanitized path string with normalized unicode and stripped control characters
|
|
1806
|
+
"""
|
|
1807
|
+
import re
|
|
1808
|
+
import unicodedata
|
|
1809
|
+
|
|
1810
|
+
# Remove leading/trailing whitespace
|
|
1811
|
+
cleaned = path_str.strip()
|
|
1812
|
+
|
|
1813
|
+
# Strip control characters (U+0000 to U+001F and U+007F to U+009F)
|
|
1814
|
+
# but preserve path separators and valid unicode characters
|
|
1815
|
+
control_chars = "".join(chr(i) for i in range(0, 32)) + "".join(chr(i) for i in range(127, 160))
|
|
1816
|
+
cleaned = cleaned.translate(str.maketrans("", "", control_chars))
|
|
1817
|
+
|
|
1818
|
+
# Normalize unicode to NFC (Canonical Decomposition, followed by Canonical Composition)
|
|
1819
|
+
# This handles diacritics and other language-specific characters consistently
|
|
1820
|
+
try:
|
|
1821
|
+
cleaned = unicodedata.normalize("NFC", cleaned)
|
|
1822
|
+
except (ValueError, TypeError) as e:
|
|
1823
|
+
# If normalization fails, try NFKC (compatibility normalization)
|
|
1824
|
+
try:
|
|
1825
|
+
cleaned = unicodedata.normalize("NFKC", cleaned)
|
|
1826
|
+
except (ValueError, TypeError):
|
|
1827
|
+
# If both fail, continue with the cleaned string
|
|
1828
|
+
LOG.warning(f"Unicode normalization failed for path: {e}")
|
|
1829
|
+
|
|
1830
|
+
# Remove any remaining invalid or problematic characters for file paths
|
|
1831
|
+
# Keep: letters, digits, spaces, and common path characters (. - _ / \\ :)
|
|
1832
|
+
# This is more permissive to allow international file names
|
|
1833
|
+
cleaned = re.sub(r'[<>"|?*\x00-\x1f\x7f-\x9f]', "", cleaned)
|
|
1834
|
+
|
|
1835
|
+
# Final strip to remove any whitespace that may have been exposed
|
|
1836
|
+
cleaned = cleaned.strip()
|
|
1837
|
+
|
|
1838
|
+
return cleaned
|
|
1839
|
+
|
|
1840
|
+
|
|
1841
|
+
def validate_path_argument(path_str: str) -> Path:
|
|
1842
|
+
"""Validate and convert path string to Path object with comprehensive error checking.
|
|
1843
|
+
|
|
1844
|
+
Args:
|
|
1845
|
+
path_str: Path string from command line argument
|
|
1846
|
+
|
|
1847
|
+
Returns:
|
|
1848
|
+
Validated Path object
|
|
1849
|
+
|
|
1850
|
+
Raises:
|
|
1851
|
+
argparse.ArgumentTypeError: If path is invalid, doesn't exist, is empty,
|
|
1852
|
+
has permission issues, or is on an unmounted volume
|
|
1853
|
+
"""
|
|
1854
|
+
# Sanitize the path string
|
|
1855
|
+
cleaned_str = sanitize_path_string(path_str)
|
|
1856
|
+
|
|
1857
|
+
if not cleaned_str:
|
|
1858
|
+
raise argparse.ArgumentTypeError("Path cannot be empty after sanitization")
|
|
1859
|
+
|
|
1860
|
+
# Convert to Path object
|
|
1861
|
+
try:
|
|
1862
|
+
path = Path(cleaned_str).expanduser().resolve()
|
|
1863
|
+
except (ValueError, RuntimeError, OSError) as e:
|
|
1864
|
+
raise argparse.ArgumentTypeError(f"Invalid path: {e}")
|
|
1865
|
+
|
|
1866
|
+
# Check if path exists
|
|
1867
|
+
if not path.exists():
|
|
1868
|
+
# Check if it's on an unmounted volume or network path
|
|
1869
|
+
parent = path.parent
|
|
1870
|
+
if parent.exists():
|
|
1871
|
+
# Parent exists but file/dir doesn't - likely deleted/moved
|
|
1872
|
+
raise argparse.ArgumentTypeError(f"Path does not exist: {path}")
|
|
1873
|
+
else:
|
|
1874
|
+
# Parent doesn't exist - might be unmounted volume
|
|
1875
|
+
raise argparse.ArgumentTypeError(f"Path does not exist (unmounted volume or network path?): {path}")
|
|
1876
|
+
|
|
1877
|
+
# Check if we have read permissions
|
|
1878
|
+
try:
|
|
1879
|
+
# For directories, try to list contents
|
|
1880
|
+
if path.is_dir():
|
|
1881
|
+
try:
|
|
1882
|
+
next(path.iterdir(), None)
|
|
1883
|
+
except PermissionError:
|
|
1884
|
+
raise argparse.ArgumentTypeError(f"Permission denied: Cannot read directory {path}")
|
|
1885
|
+
except OSError as e:
|
|
1886
|
+
raise argparse.ArgumentTypeError(f"Cannot access directory {path}: {e}")
|
|
1887
|
+
# For files, try to open and read
|
|
1888
|
+
else:
|
|
1889
|
+
try:
|
|
1890
|
+
# Check if file is readable
|
|
1891
|
+
with path.open("rb") as f:
|
|
1892
|
+
# Try to read first byte to check if file is accessible
|
|
1893
|
+
f.read(1)
|
|
1894
|
+
except PermissionError:
|
|
1895
|
+
raise argparse.ArgumentTypeError(f"Permission denied: Cannot read file {path}")
|
|
1896
|
+
except OSError as e:
|
|
1897
|
+
# Could be corrupt, on unmounted volume, or other I/O error
|
|
1898
|
+
raise argparse.ArgumentTypeError(f"Cannot read file {path}: {e}")
|
|
1899
|
+
|
|
1900
|
+
# Check if file is empty (warn but don't fail - might be intentional for testing)
|
|
1901
|
+
if path.stat().st_size == 0:
|
|
1902
|
+
# Note: We don't raise an error here because empty files might be intentional
|
|
1903
|
+
# The CLI will handle this later in the processing pipeline
|
|
1904
|
+
LOG.warning(f"File is empty: {path}")
|
|
1905
|
+
|
|
1906
|
+
except argparse.ArgumentTypeError:
|
|
1907
|
+
# Re-raise our custom errors
|
|
1908
|
+
raise
|
|
1909
|
+
except Exception as e:
|
|
1910
|
+
# Catch any other unexpected errors
|
|
1911
|
+
raise argparse.ArgumentTypeError(f"Error validating path {path}: {e}")
|
|
1912
|
+
|
|
1913
|
+
return path
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
def check_write_permission(directory: Path, operation_name: str = "write") -> None:
|
|
1917
|
+
"""Check if we have write permissions in the given directory.
|
|
1918
|
+
|
|
1919
|
+
Args:
|
|
1920
|
+
directory: Directory to check for write permissions
|
|
1921
|
+
operation_name: Description of the operation needing write access (for error messages)
|
|
1922
|
+
|
|
1923
|
+
Raises:
|
|
1924
|
+
PermissionError: If directory is not writable with a clear error message
|
|
1925
|
+
OSError: If directory cannot be accessed for other reasons
|
|
1926
|
+
"""
|
|
1927
|
+
import tempfile
|
|
1928
|
+
|
|
1929
|
+
if not directory.exists():
|
|
1930
|
+
raise OSError(f"Directory does not exist: {directory}")
|
|
1931
|
+
|
|
1932
|
+
if not directory.is_dir():
|
|
1933
|
+
raise OSError(f"Path is not a directory: {directory}")
|
|
1934
|
+
|
|
1935
|
+
# Try to create a temporary file to test write permissions
|
|
1936
|
+
try:
|
|
1937
|
+
with tempfile.NamedTemporaryFile(dir=directory, delete=True) as tmp:
|
|
1938
|
+
# Successfully created and can write
|
|
1939
|
+
tmp.write(b"test")
|
|
1940
|
+
except PermissionError:
|
|
1941
|
+
raise PermissionError(f"Permission denied: Cannot {operation_name} in directory {directory}\nPlease check that you have write permissions for this location.")
|
|
1942
|
+
except OSError as e:
|
|
1943
|
+
raise OSError(f"Cannot {operation_name} in directory {directory}: {e}")
|
|
1944
|
+
|
|
1945
|
+
|
|
1946
|
+
def parse_max_image_pixels(value: str) -> Optional[int]:
|
|
1947
|
+
normalized = value.strip().lower()
|
|
1948
|
+
if normalized in {"none", "disable", "disabled", "off", "0"}:
|
|
1949
|
+
return None
|
|
1950
|
+
try:
|
|
1951
|
+
pixels = int(normalized)
|
|
1952
|
+
except ValueError as exc:
|
|
1953
|
+
raise argparse.ArgumentTypeError("max image pixels must be a positive integer or 'none' to disable") from exc
|
|
1954
|
+
if pixels <= 0:
|
|
1955
|
+
raise argparse.ArgumentTypeError("max image pixels must be a positive integer or 'none' to disable")
|
|
1956
|
+
return pixels
|
|
1957
|
+
|
|
1958
|
+
|
|
1959
|
+
def configure_pillow_max_image_pixels(max_image_pixels: Optional[int]) -> None:
|
|
1960
|
+
Image.MAX_IMAGE_PIXELS = max_image_pixels
|
|
1961
|
+
if max_image_pixels is None:
|
|
1962
|
+
LOG.info("Pillow decompression-bomb protection disabled.")
|
|
1963
|
+
else:
|
|
1964
|
+
LOG.info("Pillow MAX_IMAGE_PIXELS set to %s", max_image_pixels)
|
|
1965
|
+
|
|
1966
|
+
|
|
1967
|
+
def parse_args() -> argparse.Namespace:
|
|
1968
|
+
parser = argparse.ArgumentParser(
|
|
1969
|
+
prog="smart-media-manager",
|
|
1970
|
+
description="Scan and import media into Apple Photos, fixing extensions and compatibility.",
|
|
1971
|
+
epilog="Examples:\n %(prog)s /path/to/media --recursive\n %(prog)s /path/to/image.jpg\n %(prog)s # scans current directory",
|
|
1972
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
1973
|
+
)
|
|
1974
|
+
parser.add_argument(
|
|
1975
|
+
"path",
|
|
1976
|
+
nargs="?",
|
|
1977
|
+
default=None, # Changed from Path.cwd() to allow special handling
|
|
1978
|
+
type=validate_path_argument, # Use custom validation function
|
|
1979
|
+
metavar="PATH",
|
|
1980
|
+
help="Directory to scan (default: current directory) or path to a single file",
|
|
1981
|
+
)
|
|
1982
|
+
parser.add_argument(
|
|
1983
|
+
"--delete",
|
|
1984
|
+
action="store_true",
|
|
1985
|
+
help="Delete the temporary FOUND_MEDIA_FILES_<timestamp> folder after a successful import.",
|
|
1986
|
+
)
|
|
1987
|
+
parser.add_argument(
|
|
1988
|
+
"--recursive",
|
|
1989
|
+
action="store_true",
|
|
1990
|
+
help="Recursively scan the folder for media files.",
|
|
1991
|
+
)
|
|
1992
|
+
parser.add_argument(
|
|
1993
|
+
"--follow-symlinks",
|
|
1994
|
+
action="store_true",
|
|
1995
|
+
help="Follow symbolic links when scanning.",
|
|
1996
|
+
)
|
|
1997
|
+
parser.add_argument(
|
|
1998
|
+
"--skip-bootstrap",
|
|
1999
|
+
action="store_true",
|
|
2000
|
+
help="Skip automatic dependency installation (requires prerequisites already installed).",
|
|
2001
|
+
)
|
|
2002
|
+
parser.add_argument(
|
|
2003
|
+
"--skip-convert",
|
|
2004
|
+
action="store_true",
|
|
2005
|
+
help="Skip format conversion/transcoding. Files must already be Photos-compatible. Useful for testing raw compatibility.",
|
|
2006
|
+
)
|
|
2007
|
+
parser.add_argument(
|
|
2008
|
+
"--skip-compatibility-check",
|
|
2009
|
+
action="store_true",
|
|
2010
|
+
help="Skip all compatibility validation checks. ⚠️ WARNING: May cause Photos import errors! Use only for format testing.",
|
|
2011
|
+
)
|
|
2012
|
+
parser.add_argument(
|
|
2013
|
+
"--max-image-pixels",
|
|
2014
|
+
type=parse_max_image_pixels,
|
|
2015
|
+
default=MAX_IMAGE_PIXELS_UNSET,
|
|
2016
|
+
help="Set Pillow image pixel limit; use 'none' to disable (default: none).",
|
|
2017
|
+
)
|
|
2018
|
+
parser.add_argument(
|
|
2019
|
+
"--album",
|
|
2020
|
+
type=str,
|
|
2021
|
+
default="Smart Media Manager",
|
|
2022
|
+
help="Photos album name to import into (default: 'Smart Media Manager').",
|
|
2023
|
+
)
|
|
2024
|
+
parser.add_argument(
|
|
2025
|
+
"--skip-duplicate-check",
|
|
2026
|
+
action="store_true",
|
|
2027
|
+
default=False,
|
|
2028
|
+
help="Skip duplicate checking during import (faster but may import duplicates). Default: check for duplicates and prompt user.",
|
|
2029
|
+
)
|
|
2030
|
+
parser.add_argument(
|
|
2031
|
+
"--copy",
|
|
2032
|
+
dest="copy_mode",
|
|
2033
|
+
action="store_true",
|
|
2034
|
+
help="Copy files into staging instead of moving them (originals are left untouched).",
|
|
2035
|
+
)
|
|
2036
|
+
parser.add_argument(
|
|
2037
|
+
"-y",
|
|
2038
|
+
"--yes",
|
|
2039
|
+
"--assume-yes",
|
|
2040
|
+
dest="assume_yes",
|
|
2041
|
+
action="store_true",
|
|
2042
|
+
help="Skip confirmation prompt before scanning. Useful for automation and tests.",
|
|
2043
|
+
)
|
|
2044
|
+
parser.add_argument(
|
|
2045
|
+
"--version",
|
|
2046
|
+
action="version",
|
|
2047
|
+
version=f"%(prog)s {__version__}",
|
|
2048
|
+
help="Show the smart-media-manager version and exit.",
|
|
2049
|
+
)
|
|
2050
|
+
|
|
2051
|
+
args = parser.parse_args()
|
|
2052
|
+
|
|
2053
|
+
# Environment override to avoid interactive prompt (CI/testing)
|
|
2054
|
+
if not args.assume_yes:
|
|
2055
|
+
env_assume = os.environ.get("SMART_MEDIA_MANAGER_ASSUME_YES")
|
|
2056
|
+
if env_assume and env_assume.strip().lower() not in {"0", "false", "no"}:
|
|
2057
|
+
args.assume_yes = True
|
|
2058
|
+
|
|
2059
|
+
if args.max_image_pixels is MAX_IMAGE_PIXELS_UNSET:
|
|
2060
|
+
env_max_pixels = os.environ.get("SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS")
|
|
2061
|
+
if env_max_pixels and env_max_pixels.strip():
|
|
2062
|
+
try:
|
|
2063
|
+
args.max_image_pixels = parse_max_image_pixels(env_max_pixels)
|
|
2064
|
+
except argparse.ArgumentTypeError as exc:
|
|
2065
|
+
parser.error(f"Invalid SMART_MEDIA_MANAGER_MAX_IMAGE_PIXELS: {exc}")
|
|
2066
|
+
else:
|
|
2067
|
+
args.max_image_pixels = None
|
|
2068
|
+
|
|
2069
|
+
# Handle default path (current directory) if no path provided
|
|
2070
|
+
if args.path is None:
|
|
2071
|
+
args.path = Path.cwd()
|
|
2072
|
+
|
|
2073
|
+
# In copy mode the user likely wants to keep originals; implicit yes to prompt if flag set
|
|
2074
|
+
if args.copy_mode:
|
|
2075
|
+
args.assume_yes = True
|
|
2076
|
+
|
|
2077
|
+
return args
|
|
2078
|
+
|
|
2079
|
+
|
|
2080
|
+
def ensure_dependency(name: str) -> None:
|
|
2081
|
+
if shutil.which(name) is None:
|
|
2082
|
+
raise RuntimeError(f"Required dependency '{name}' is not available on PATH.")
|
|
2083
|
+
|
|
2084
|
+
|
|
2085
|
+
def ffprobe(path: Path) -> Optional[dict[str, Any]]:
|
|
2086
|
+
cmd = [
|
|
2087
|
+
"ffprobe",
|
|
2088
|
+
"-v",
|
|
2089
|
+
"error",
|
|
2090
|
+
"-print_format",
|
|
2091
|
+
"json",
|
|
2092
|
+
"-show_streams",
|
|
2093
|
+
"-show_format",
|
|
2094
|
+
str(path),
|
|
2095
|
+
]
|
|
2096
|
+
result = subprocess.run(
|
|
2097
|
+
cmd,
|
|
2098
|
+
capture_output=True,
|
|
2099
|
+
text=True,
|
|
2100
|
+
check=False,
|
|
2101
|
+
)
|
|
2102
|
+
if result.returncode != 0:
|
|
2103
|
+
return None
|
|
2104
|
+
try:
|
|
2105
|
+
return json.loads(result.stdout) # type: ignore[no-any-return]
|
|
2106
|
+
except json.JSONDecodeError:
|
|
2107
|
+
return None
|
|
2108
|
+
|
|
2109
|
+
|
|
2110
|
+
def extract_and_normalize_metadata(probe_data: dict[str, Any]) -> dict[str, Any]:
|
|
2111
|
+
"""
|
|
2112
|
+
Extract metadata from ffprobe JSON and normalize field names to UUIDs.
|
|
2113
|
+
|
|
2114
|
+
Extracts metadata from both format-level and stream-level tags, then uses
|
|
2115
|
+
the metadata registry to translate ffprobe field names to canonical UUIDs.
|
|
2116
|
+
|
|
2117
|
+
Args:
|
|
2118
|
+
probe_data: FFprobe JSON output with 'format' and 'streams' keys
|
|
2119
|
+
|
|
2120
|
+
Returns:
|
|
2121
|
+
Dictionary with UUID keys mapping to metadata values
|
|
2122
|
+
|
|
2123
|
+
Example:
|
|
2124
|
+
>>> probe = {"format": {"tags": {"creation_time": "2024-01-15"}}}
|
|
2125
|
+
>>> metadata = extract_and_normalize_metadata(probe)
|
|
2126
|
+
>>> # Returns: {'3d4f8a9c-1e7b-5c3d-9a2f-4e8c1b7d3a9f-M': '2024-01-15'}
|
|
2127
|
+
"""
|
|
2128
|
+
raw_metadata: dict[str, Any] = {}
|
|
2129
|
+
|
|
2130
|
+
# Extract format-level tags (creation_time, artist, title, etc.)
|
|
2131
|
+
format_info = probe_data.get("format", {})
|
|
2132
|
+
format_tags = format_info.get("tags", {})
|
|
2133
|
+
if format_tags:
|
|
2134
|
+
# FFprobe tags can have mixed case, normalize to lowercase keys
|
|
2135
|
+
for key, value in format_tags.items():
|
|
2136
|
+
# Store with lowercase key for consistency
|
|
2137
|
+
raw_metadata[key.lower()] = value
|
|
2138
|
+
|
|
2139
|
+
# Extract stream-level tags (for multi-stream files)
|
|
2140
|
+
streams = probe_data.get("streams", [])
|
|
2141
|
+
for stream in streams:
|
|
2142
|
+
stream_tags = stream.get("tags", {})
|
|
2143
|
+
if stream_tags:
|
|
2144
|
+
for key, value in stream_tags.items():
|
|
2145
|
+
# Only add if not already present (format-level takes precedence)
|
|
2146
|
+
lower_key = key.lower()
|
|
2147
|
+
if lower_key not in raw_metadata:
|
|
2148
|
+
raw_metadata[lower_key] = value
|
|
2149
|
+
|
|
2150
|
+
# Normalize metadata using UUID translation layer
|
|
2151
|
+
# This converts ffprobe field names to canonical UUIDs
|
|
2152
|
+
if raw_metadata:
|
|
2153
|
+
normalized = metadata_registry.normalize_metadata_dict("ffprobe", raw_metadata)
|
|
2154
|
+
LOG.debug(f"Extracted and normalized {len(normalized)} metadata fields from ffprobe")
|
|
2155
|
+
return normalized
|
|
2156
|
+
|
|
2157
|
+
return {}
|
|
2158
|
+
|
|
2159
|
+
|
|
2160
|
+
def is_video_corrupt_or_truncated(path: Path) -> tuple[bool, Optional[str]]:
|
|
2161
|
+
"""
|
|
2162
|
+
FAST corruption detection for video files (<1 second for most files).
|
|
2163
|
+
|
|
2164
|
+
Strategy: Decode first 5 seconds with error detection enabled.
|
|
2165
|
+
This catches 99% of corruption while being very fast.
|
|
2166
|
+
|
|
2167
|
+
For truncated files: The corruption usually manifests early when
|
|
2168
|
+
decoder hits missing/invalid data, even if file claims full duration.
|
|
2169
|
+
"""
|
|
2170
|
+
# Quick check: can ffprobe read the file?
|
|
2171
|
+
probe = ffprobe(path)
|
|
2172
|
+
if probe is None:
|
|
2173
|
+
return True, "ffprobe cannot read file"
|
|
2174
|
+
|
|
2175
|
+
# Check for streams
|
|
2176
|
+
streams = probe.get("streams", [])
|
|
2177
|
+
if not streams:
|
|
2178
|
+
return True, "no streams found"
|
|
2179
|
+
|
|
2180
|
+
# Check for video stream
|
|
2181
|
+
has_video = any(s.get("codec_type") == "video" for s in streams)
|
|
2182
|
+
if not has_video:
|
|
2183
|
+
return True, "no video stream found"
|
|
2184
|
+
|
|
2185
|
+
# Check format info
|
|
2186
|
+
format_info = probe.get("format", {})
|
|
2187
|
+
if not format_info:
|
|
2188
|
+
return True, "no format information"
|
|
2189
|
+
|
|
2190
|
+
# Check duration
|
|
2191
|
+
try:
|
|
2192
|
+
duration = float(format_info.get("duration", 0))
|
|
2193
|
+
if duration <= 0:
|
|
2194
|
+
return True, "invalid or missing duration"
|
|
2195
|
+
except (ValueError, TypeError):
|
|
2196
|
+
return True, "cannot parse duration"
|
|
2197
|
+
|
|
2198
|
+
# FAST CHECK: Decode first 5 seconds with explode on errors
|
|
2199
|
+
# This is MUCH faster than full decode but catches most corruption
|
|
2200
|
+
# Timeout after 5 seconds to prevent hanging
|
|
2201
|
+
cmd = [
|
|
2202
|
+
"ffmpeg",
|
|
2203
|
+
"-v",
|
|
2204
|
+
"error",
|
|
2205
|
+
"-err_detect",
|
|
2206
|
+
"explode", # Exit on first error
|
|
2207
|
+
"-t",
|
|
2208
|
+
"5", # Only decode first 5 seconds
|
|
2209
|
+
"-i",
|
|
2210
|
+
str(path),
|
|
2211
|
+
"-vframes",
|
|
2212
|
+
"60", # Max 60 frames (2.5s at 24fps)
|
|
2213
|
+
"-f",
|
|
2214
|
+
"null",
|
|
2215
|
+
"-",
|
|
2216
|
+
]
|
|
2217
|
+
|
|
2218
|
+
try:
|
|
2219
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
|
|
2220
|
+
except subprocess.TimeoutExpired:
|
|
2221
|
+
return True, "validation timeout - likely corrupted or very slow codec"
|
|
2222
|
+
|
|
2223
|
+
# CRITICAL: Check stderr REGARDLESS of exit code!
|
|
2224
|
+
# ffmpeg returns 0 even when it detects corruption
|
|
2225
|
+
stderr = result.stderr.lower() if result.stderr else ""
|
|
2226
|
+
|
|
2227
|
+
corruption_indicators = [
|
|
2228
|
+
"partial file",
|
|
2229
|
+
"invalid nal",
|
|
2230
|
+
"invalid data",
|
|
2231
|
+
"decoding error",
|
|
2232
|
+
"error splitting",
|
|
2233
|
+
"corrupt",
|
|
2234
|
+
"truncat",
|
|
2235
|
+
"moov atom not found",
|
|
2236
|
+
"incomplete",
|
|
2237
|
+
"unexpected end",
|
|
2238
|
+
"end of file",
|
|
2239
|
+
"premature end",
|
|
2240
|
+
"failed to decode",
|
|
2241
|
+
"invalid bitstream",
|
|
2242
|
+
"error decoding",
|
|
2243
|
+
]
|
|
2244
|
+
|
|
2245
|
+
for indicator in corruption_indicators:
|
|
2246
|
+
if indicator in stderr:
|
|
2247
|
+
return True, f"corruption detected: {stderr[:200]}"
|
|
2248
|
+
|
|
2249
|
+
# Also check return code for fatal errors
|
|
2250
|
+
if result.returncode != 0:
|
|
2251
|
+
return True, f"decode failed: {stderr[:200]}"
|
|
2252
|
+
|
|
2253
|
+
# ADDITIONAL CHECK: For longer videos, check near the end too
|
|
2254
|
+
# This catches truncation that doesn't manifest in first 5s
|
|
2255
|
+
if duration > 10:
|
|
2256
|
+
# Try to seek near end and decode a few frames
|
|
2257
|
+
seek_time = max(0, duration - 2)
|
|
2258
|
+
cmd_end = [
|
|
2259
|
+
"ffmpeg",
|
|
2260
|
+
"-v",
|
|
2261
|
+
"error",
|
|
2262
|
+
"-ss",
|
|
2263
|
+
str(seek_time),
|
|
2264
|
+
"-i",
|
|
2265
|
+
str(path),
|
|
2266
|
+
"-vframes",
|
|
2267
|
+
"5",
|
|
2268
|
+
"-f",
|
|
2269
|
+
"null",
|
|
2270
|
+
"-",
|
|
2271
|
+
]
|
|
2272
|
+
|
|
2273
|
+
try:
|
|
2274
|
+
result_end = subprocess.run(cmd_end, capture_output=True, text=True, timeout=3)
|
|
2275
|
+
stderr_end = result_end.stderr.lower() if result_end.stderr else ""
|
|
2276
|
+
|
|
2277
|
+
for indicator in corruption_indicators:
|
|
2278
|
+
if indicator in stderr_end:
|
|
2279
|
+
return True, f"truncated at end: {result_end.stderr[:150]}"
|
|
2280
|
+
except subprocess.TimeoutExpired:
|
|
2281
|
+
# End-check timeout is acceptable for very large files
|
|
2282
|
+
pass
|
|
2283
|
+
|
|
2284
|
+
return False, None
|
|
2285
|
+
|
|
2286
|
+
|
|
2287
|
+
def extract_container(format_name: str) -> str:
|
|
2288
|
+
return format_name.split(",")[0].strip().lower()
|
|
2289
|
+
|
|
2290
|
+
|
|
2291
|
+
def is_skippable_file(path: Path) -> Optional[str]:
|
|
2292
|
+
try:
|
|
2293
|
+
if path.stat().st_size == 0:
|
|
2294
|
+
return "file is empty"
|
|
2295
|
+
except OSError as exc:
|
|
2296
|
+
return f"stat failed: {exc.strerror or exc.args[0]}"
|
|
2297
|
+
|
|
2298
|
+
try:
|
|
2299
|
+
with path.open("rb") as handle:
|
|
2300
|
+
with suppress(AttributeError, OSError):
|
|
2301
|
+
os.posix_fadvise(handle.fileno(), 0, 0, os.POSIX_FADV_RANDOM) # type: ignore[attr-defined]
|
|
2302
|
+
handle.read(1)
|
|
2303
|
+
except PermissionError as exc:
|
|
2304
|
+
return f"permission denied: {exc.filename or path}"
|
|
2305
|
+
except OSError as exc:
|
|
2306
|
+
return f"io error: {exc.strerror or exc.args[0]}"
|
|
2307
|
+
|
|
2308
|
+
suffix = path.suffix.lower()
|
|
2309
|
+
if suffix in TEXT_ONLY_HINT_EXTENSIONS and looks_like_text_file(path):
|
|
2310
|
+
return "text file"
|
|
2311
|
+
|
|
2312
|
+
try:
|
|
2313
|
+
if not is_binary_file(str(path)):
|
|
2314
|
+
return "text file"
|
|
2315
|
+
except Exception as exc: # noqa: BLE001
|
|
2316
|
+
return f"binary check failed: {exc}"
|
|
2317
|
+
|
|
2318
|
+
return None
|
|
2319
|
+
|
|
2320
|
+
|
|
2321
|
+
def detect_media(path: Path, skip_compatibility_check: bool = False) -> tuple[Optional[MediaFile], Optional[str]]:
|
|
2322
|
+
filetype_signature = safe_filetype_guess(path)
|
|
2323
|
+
puremagic_signature = safe_puremagic_guess(path)
|
|
2324
|
+
signatures = [filetype_signature, puremagic_signature]
|
|
2325
|
+
|
|
2326
|
+
if any(is_archive_signature(sig) for sig in signatures):
|
|
2327
|
+
return None, "non-media: archive file"
|
|
2328
|
+
|
|
2329
|
+
if any(is_textual_mime(sig.mime) for sig in signatures):
|
|
2330
|
+
return None, "non-media: text file"
|
|
2331
|
+
|
|
2332
|
+
votes = collect_format_votes(path, puremagic_signature)
|
|
2333
|
+
consensus = select_consensus_vote(votes)
|
|
2334
|
+
if not consensus:
|
|
2335
|
+
return None, votes_error_summary(votes)
|
|
2336
|
+
|
|
2337
|
+
# UUID-based format detection for early filtering
|
|
2338
|
+
tool_results = {}
|
|
2339
|
+
for vote in votes:
|
|
2340
|
+
if vote.tool and not vote.error:
|
|
2341
|
+
# Collect tool outputs for UUID lookup
|
|
2342
|
+
if vote.description:
|
|
2343
|
+
tool_results[vote.tool] = vote.description
|
|
2344
|
+
elif vote.mime:
|
|
2345
|
+
tool_results[vote.tool] = vote.mime
|
|
2346
|
+
|
|
2347
|
+
# Try UUID-based detection
|
|
2348
|
+
detected_uuid = format_registry.format_detection_result(tool_results) if tool_results else None
|
|
2349
|
+
uuid_compatible = format_registry.is_apple_photos_compatible(detected_uuid) if detected_uuid else None
|
|
2350
|
+
uuid_canonical_name = format_registry.get_canonical_name(detected_uuid) if detected_uuid else None
|
|
2351
|
+
|
|
2352
|
+
# Register any tool outputs that lack a mapping to help expand the registry
|
|
2353
|
+
if tool_results:
|
|
2354
|
+
suffix = path.suffix.lower() if path.suffix else ""
|
|
2355
|
+
|
|
2356
|
+
def infer_kind() -> str:
|
|
2357
|
+
if is_image_signature(Signature(extension=suffix)) or any(is_image_signature(sig) for sig in signatures):
|
|
2358
|
+
return "image"
|
|
2359
|
+
if is_video_signature(Signature(extension=suffix)) or any(is_video_signature(sig) for sig in signatures):
|
|
2360
|
+
return "video"
|
|
2361
|
+
return "container"
|
|
2362
|
+
|
|
2363
|
+
for tool_name, token in tool_results.items():
|
|
2364
|
+
if not token:
|
|
2365
|
+
continue
|
|
2366
|
+
if format_registry.lookup_format_uuid(tool_name, token) is None:
|
|
2367
|
+
UNKNOWN_MAPPINGS.register(tool_name, token, infer_kind(), path)
|
|
2368
|
+
|
|
2369
|
+
# Log UUID detection for debugging
|
|
2370
|
+
if detected_uuid:
|
|
2371
|
+
LOG.debug(f"UUID detection for {path.name}: uuid={detected_uuid}, canonical={uuid_canonical_name}, compatible={uuid_compatible}")
|
|
2372
|
+
|
|
2373
|
+
detected_kind = determine_media_kind(votes, consensus)
|
|
2374
|
+
if detected_kind not in {"image", "video", "raw"}:
|
|
2375
|
+
reason = consensus.mime or consensus.description or votes_error_summary(votes)
|
|
2376
|
+
if reason:
|
|
2377
|
+
return None, f"non-media: {reason}"
|
|
2378
|
+
return None, "non-media: unidentified format"
|
|
2379
|
+
size_bytes = None
|
|
2380
|
+
try:
|
|
2381
|
+
size_bytes = path.stat().st_size
|
|
2382
|
+
except OSError:
|
|
2383
|
+
size_bytes = None
|
|
2384
|
+
|
|
2385
|
+
suffix = path.suffix.lower() if path.suffix else ""
|
|
2386
|
+
|
|
2387
|
+
animated = False
|
|
2388
|
+
if suffix in {".gif"}:
|
|
2389
|
+
animated = is_animated_gif(path)
|
|
2390
|
+
elif suffix in {".png"}:
|
|
2391
|
+
animated = is_animated_png(path)
|
|
2392
|
+
elif suffix in {".webp"}:
|
|
2393
|
+
animated = is_animated_webp(path)
|
|
2394
|
+
|
|
2395
|
+
psd_color_mode = get_psd_color_mode(path) if suffix == ".psd" else None
|
|
2396
|
+
if suffix == ".psd" and not psd_color_mode:
|
|
2397
|
+
psd_color_mode = "unknown"
|
|
2398
|
+
|
|
2399
|
+
def vote_for(tool: str) -> Optional[FormatVote]:
|
|
2400
|
+
for vote in votes:
|
|
2401
|
+
if vote.tool == tool:
|
|
2402
|
+
return vote
|
|
2403
|
+
return None
|
|
2404
|
+
|
|
2405
|
+
libmagic_vote = vote_for("libmagic")
|
|
2406
|
+
puremagic_vote = vote_for("puremagic")
|
|
2407
|
+
pyfsig_vote = vote_for("pyfsig")
|
|
2408
|
+
binwalk_vote = vote_for("binwalk")
|
|
2409
|
+
|
|
2410
|
+
libmagic_values = [val for val in (libmagic_vote.mime, libmagic_vote.description) if val] if libmagic_vote else []
|
|
2411
|
+
puremagic_values: list[str] = []
|
|
2412
|
+
if puremagic_vote:
|
|
2413
|
+
if puremagic_vote.mime:
|
|
2414
|
+
puremagic_values.append(puremagic_vote.mime)
|
|
2415
|
+
if puremagic_vote.extension:
|
|
2416
|
+
puremagic_values.append(puremagic_vote.extension)
|
|
2417
|
+
if puremagic_vote.extension.startswith("."):
|
|
2418
|
+
puremagic_values.append(puremagic_vote.extension.lstrip("."))
|
|
2419
|
+
if puremagic_vote.description:
|
|
2420
|
+
puremagic_values.append(puremagic_vote.description)
|
|
2421
|
+
pyfsig_values: list[str] = []
|
|
2422
|
+
if pyfsig_vote:
|
|
2423
|
+
if pyfsig_vote.description:
|
|
2424
|
+
pyfsig_values.append(pyfsig_vote.description)
|
|
2425
|
+
if pyfsig_vote.extension:
|
|
2426
|
+
pyfsig_values.append(pyfsig_vote.extension)
|
|
2427
|
+
if pyfsig_vote.extension.startswith("."):
|
|
2428
|
+
pyfsig_values.append(pyfsig_vote.extension.lstrip("."))
|
|
2429
|
+
binwalk_values = [binwalk_vote.description] if binwalk_vote and binwalk_vote.description else []
|
|
2430
|
+
|
|
2431
|
+
video_codec = None
|
|
2432
|
+
audio_codec = None
|
|
2433
|
+
audio_channels = None
|
|
2434
|
+
audio_layout = None
|
|
2435
|
+
container = None
|
|
2436
|
+
ffprobe_tokens: list[str] = []
|
|
2437
|
+
# Format parameters for expanded UUID generation
|
|
2438
|
+
video_bit_depth = None
|
|
2439
|
+
video_pix_fmt = None
|
|
2440
|
+
video_profile = None
|
|
2441
|
+
audio_sample_rate = None
|
|
2442
|
+
audio_sample_fmt = None
|
|
2443
|
+
# Initialize UUID variables for all file types (not just videos)
|
|
2444
|
+
video_codec_uuid = None
|
|
2445
|
+
audio_codec_uuid = None
|
|
2446
|
+
|
|
2447
|
+
if detected_kind == "video":
|
|
2448
|
+
# Check for corruption before further processing
|
|
2449
|
+
is_corrupt, corrupt_reason = is_video_corrupt_or_truncated(path)
|
|
2450
|
+
if is_corrupt:
|
|
2451
|
+
return None, f"corrupt or truncated video: {corrupt_reason}"
|
|
2452
|
+
|
|
2453
|
+
probe = ffprobe(path)
|
|
2454
|
+
if not probe:
|
|
2455
|
+
return None, "video probe failed"
|
|
2456
|
+
|
|
2457
|
+
# Extract and normalize metadata fields using UUID translation layer
|
|
2458
|
+
# This converts ffprobe field names (creation_time, artist, etc.) to UUIDs
|
|
2459
|
+
normalized_metadata = extract_and_normalize_metadata(probe)
|
|
2460
|
+
|
|
2461
|
+
streams = probe.get("streams", [])
|
|
2462
|
+
format_info = probe.get("format", {})
|
|
2463
|
+
format_name = format_info.get("format_name", "").lower()
|
|
2464
|
+
if not format_name:
|
|
2465
|
+
return None, "unsupported video container"
|
|
2466
|
+
container = extract_container(format_name)
|
|
2467
|
+
for stream in streams:
|
|
2468
|
+
codec_type = stream.get("codec_type")
|
|
2469
|
+
if codec_type == "video" and not video_codec:
|
|
2470
|
+
video_codec = (stream.get("codec_name") or "").lower() or None
|
|
2471
|
+
# Extract format parameters for expanded UUID generation
|
|
2472
|
+
video_bit_depth = stream.get("bits_per_raw_sample") # Bit depth (8, 10, 12, 16)
|
|
2473
|
+
if not video_bit_depth:
|
|
2474
|
+
# Fallback: try bits_per_component or pix_fmt parsing
|
|
2475
|
+
video_bit_depth = stream.get("bits_per_component")
|
|
2476
|
+
video_pix_fmt = stream.get("pix_fmt") # Pixel format (yuv420p, yuv422p, etc.)
|
|
2477
|
+
video_profile = stream.get("profile") # Profile (High, Main, Main 10, etc.)
|
|
2478
|
+
elif codec_type == "audio" and not audio_codec:
|
|
2479
|
+
audio_codec = (stream.get("codec_name") or "").lower() or None
|
|
2480
|
+
audio_channels = stream.get("channels")
|
|
2481
|
+
audio_layout = stream.get("channel_layout")
|
|
2482
|
+
# Extract audio format parameters
|
|
2483
|
+
sample_rate_val = stream.get("sample_rate")
|
|
2484
|
+
try:
|
|
2485
|
+
audio_sample_rate = int(sample_rate_val) if sample_rate_val is not None else None
|
|
2486
|
+
except (TypeError, ValueError):
|
|
2487
|
+
audio_sample_rate = None
|
|
2488
|
+
audio_sample_fmt = stream.get("sample_fmt")
|
|
2489
|
+
if container:
|
|
2490
|
+
ffprobe_tokens.append(f"container:{container}")
|
|
2491
|
+
if video_codec:
|
|
2492
|
+
ffprobe_tokens.append(f"video:{video_codec}")
|
|
2493
|
+
if audio_codec:
|
|
2494
|
+
ffprobe_tokens.append(f"audio:{audio_codec}")
|
|
2495
|
+
|
|
2496
|
+
# Generate expanded UUID for video codec with format parameters
|
|
2497
|
+
# This provides granular format identification (e.g., H.264 8-bit vs 10-bit)
|
|
2498
|
+
# IMPORTANT: Use the translation layer to get the base codec UUID
|
|
2499
|
+
if video_codec:
|
|
2500
|
+
try:
|
|
2501
|
+
# Translate ffprobe codec name to base UUID using the unified translation layer
|
|
2502
|
+
base_codec_uuid = format_registry.lookup_format_uuid("ffprobe", video_codec)
|
|
2503
|
+
if base_codec_uuid:
|
|
2504
|
+
# Extract the base UUID (everything before the type suffix)
|
|
2505
|
+
# E.g., "b2e62c4a-6122-548c-9bfa-0fcf3613942a-V" → "b2e62c4a-6122-548c-9bfa-0fcf3613942a"
|
|
2506
|
+
base_uuid_parts = base_codec_uuid.split("-")
|
|
2507
|
+
if len(base_uuid_parts) >= 5:
|
|
2508
|
+
base_uuid = "-".join(base_uuid_parts[:5])
|
|
2509
|
+
|
|
2510
|
+
# Convert bit_depth to int if it's a string
|
|
2511
|
+
bit_depth_int = None
|
|
2512
|
+
if video_bit_depth:
|
|
2513
|
+
bit_depth_int = int(video_bit_depth) if isinstance(video_bit_depth, str) else video_bit_depth
|
|
2514
|
+
|
|
2515
|
+
# Build expanded UUID with format parameters
|
|
2516
|
+
# Start with base UUID, append parameters, then type suffix
|
|
2517
|
+
params = []
|
|
2518
|
+
if bit_depth_int:
|
|
2519
|
+
params.append(f"{bit_depth_int}bit")
|
|
2520
|
+
if video_pix_fmt:
|
|
2521
|
+
params.append(video_pix_fmt)
|
|
2522
|
+
if video_profile:
|
|
2523
|
+
params.append(video_profile.lower())
|
|
2524
|
+
|
|
2525
|
+
if params:
|
|
2526
|
+
param_suffix = "-".join(params)
|
|
2527
|
+
video_codec_uuid = f"{base_uuid}-{param_suffix}-V"
|
|
2528
|
+
else:
|
|
2529
|
+
# No parameters, use base UUID with type suffix
|
|
2530
|
+
video_codec_uuid = base_codec_uuid
|
|
2531
|
+
|
|
2532
|
+
LOG.debug(f"Generated expanded video codec UUID for {path.name}: {video_codec_uuid} (base={base_uuid}, codec={video_codec}, bit_depth={bit_depth_int}, pix_fmt={video_pix_fmt}, profile={video_profile})")
|
|
2533
|
+
else:
|
|
2534
|
+
LOG.warning(f"Base codec UUID has unexpected format for {path.name}: {base_codec_uuid}")
|
|
2535
|
+
video_codec_uuid = base_codec_uuid # Use as-is
|
|
2536
|
+
else:
|
|
2537
|
+
UNKNOWN_MAPPINGS.register("ffprobe", video_codec, "video", path)
|
|
2538
|
+
LOG.info("No UUID mapping found for ffprobe codec '%s' for %s", video_codec, path.name)
|
|
2539
|
+
except Exception as e:
|
|
2540
|
+
LOG.warning(f"Failed to generate expanded video codec UUID for {path.name}: {e}")
|
|
2541
|
+
# Fall back to base UUID without parameters
|
|
2542
|
+
video_codec_uuid = None
|
|
2543
|
+
|
|
2544
|
+
# Generate expanded UUID for audio codec with format parameters
|
|
2545
|
+
# This provides granular format identification (e.g., AAC 48kHz vs 6kHz)
|
|
2546
|
+
# IMPORTANT: Use the translation layer to get the base codec UUID
|
|
2547
|
+
audio_codec_uuid = None
|
|
2548
|
+
if audio_codec:
|
|
2549
|
+
try:
|
|
2550
|
+
# Translate ffprobe codec name to base UUID using the unified translation layer
|
|
2551
|
+
base_audio_uuid = format_registry.lookup_format_uuid("ffprobe", audio_codec)
|
|
2552
|
+
if base_audio_uuid:
|
|
2553
|
+
# Extract the base UUID (everything before the type suffix)
|
|
2554
|
+
# E.g., "501331ba-42ea-561c-e5df-8a824df17e3f-A" → "501331ba-42ea-561c-e5df-8a824df17e3f"
|
|
2555
|
+
base_uuid_parts = base_audio_uuid.split("-")
|
|
2556
|
+
if len(base_uuid_parts) >= 5:
|
|
2557
|
+
base_uuid = "-".join(base_uuid_parts[:5])
|
|
2558
|
+
|
|
2559
|
+
# Build expanded UUID with format parameters
|
|
2560
|
+
# Start with base UUID, append parameters, then type suffix
|
|
2561
|
+
params = []
|
|
2562
|
+
if audio_sample_rate:
|
|
2563
|
+
params.append(str(audio_sample_rate))
|
|
2564
|
+
if audio_sample_fmt:
|
|
2565
|
+
params.append(audio_sample_fmt)
|
|
2566
|
+
|
|
2567
|
+
if params:
|
|
2568
|
+
param_suffix = "-".join(params)
|
|
2569
|
+
audio_codec_uuid = f"{base_uuid}-{param_suffix}-A"
|
|
2570
|
+
else:
|
|
2571
|
+
# No parameters, use base UUID with type suffix
|
|
2572
|
+
audio_codec_uuid = base_audio_uuid
|
|
2573
|
+
|
|
2574
|
+
LOG.debug(f"Generated expanded audio codec UUID for {path.name}: {audio_codec_uuid} (base={base_uuid}, codec={audio_codec}, sample_rate={audio_sample_rate}, sample_fmt={audio_sample_fmt})")
|
|
2575
|
+
else:
|
|
2576
|
+
LOG.warning(f"Base audio codec UUID has unexpected format for {path.name}: {base_audio_uuid}")
|
|
2577
|
+
audio_codec_uuid = base_audio_uuid # Use as-is
|
|
2578
|
+
else:
|
|
2579
|
+
UNKNOWN_MAPPINGS.register("ffprobe", audio_codec, "audio", path)
|
|
2580
|
+
LOG.info("No UUID mapping found for ffprobe audio codec '%s' for %s", audio_codec, path.name)
|
|
2581
|
+
except Exception as e:
|
|
2582
|
+
LOG.warning(f"Failed to generate expanded audio codec UUID for {path.name}: {e}")
|
|
2583
|
+
# Fall back to None
|
|
2584
|
+
audio_codec_uuid = None
|
|
2585
|
+
|
|
2586
|
+
extension_candidates: list[Optional[str]] = []
|
|
2587
|
+
if consensus:
|
|
2588
|
+
consensus_ext = canonicalize_extension(consensus.extension) # Apply canonicalization to detected extension
|
|
2589
|
+
if consensus_ext:
|
|
2590
|
+
extension_candidates.append(consensus_ext)
|
|
2591
|
+
suffix_ext = canonicalize_extension(path.suffix) # Apply canonicalization to file suffix
|
|
2592
|
+
if suffix_ext and suffix_ext not in extension_candidates:
|
|
2593
|
+
extension_candidates.append(suffix_ext)
|
|
2594
|
+
extension_candidates.append(None)
|
|
2595
|
+
|
|
2596
|
+
rule: Optional[FormatRule] = None
|
|
2597
|
+
for candidate in extension_candidates:
|
|
2598
|
+
rule = match_rule(
|
|
2599
|
+
extension=candidate,
|
|
2600
|
+
libmagic=libmagic_values,
|
|
2601
|
+
puremagic=puremagic_values,
|
|
2602
|
+
pyfsig=pyfsig_values,
|
|
2603
|
+
binwalk=binwalk_values,
|
|
2604
|
+
rawpy=None,
|
|
2605
|
+
ffprobe_streams=ffprobe_tokens,
|
|
2606
|
+
animated=animated,
|
|
2607
|
+
size_bytes=size_bytes,
|
|
2608
|
+
psd_color_mode=psd_color_mode,
|
|
2609
|
+
)
|
|
2610
|
+
if rule:
|
|
2611
|
+
break
|
|
2612
|
+
|
|
2613
|
+
# CRITICAL: JSON file is the SOLE source of truth for format identification
|
|
2614
|
+
# If UUID detection fails, the file is unidentified and must be rejected
|
|
2615
|
+
if not detected_uuid:
|
|
2616
|
+
LOG.debug(f"UUID detection failed for {path.name} - file not identified")
|
|
2617
|
+
return None, "non-media: format not identified by UUID system"
|
|
2618
|
+
|
|
2619
|
+
# For video files, pass both container UUID and video codec UUID
|
|
2620
|
+
# This provides granular format identification (e.g., H.264 8-bit vs 10-bit)
|
|
2621
|
+
# while also checking container compatibility (MP4/MOV vs MKV)
|
|
2622
|
+
primary_uuid = detected_uuid
|
|
2623
|
+
container_uuid_param = None
|
|
2624
|
+
if detected_kind == "video" and "video_codec_uuid" in locals() and video_codec_uuid:
|
|
2625
|
+
# Use expanded video codec UUID as primary, pass container UUID separately
|
|
2626
|
+
primary_uuid = video_codec_uuid
|
|
2627
|
+
container_uuid_param = detected_uuid
|
|
2628
|
+
LOG.debug(f"Using expanded video codec UUID for {path.name}: {video_codec_uuid} (container UUID: {detected_uuid})")
|
|
2629
|
+
|
|
2630
|
+
# UUID detected - determine action from JSON
|
|
2631
|
+
# Pass audio_codec_uuid instead of audio_codec to use UUID-based compatibility checking
|
|
2632
|
+
uuid_action = format_registry.get_format_action(primary_uuid, video_codec, audio_codec_uuid, container_uuid_param)
|
|
2633
|
+
if not uuid_action:
|
|
2634
|
+
# UUID identified but format is unsupported
|
|
2635
|
+
LOG.debug(f"UUID {primary_uuid} identified but unsupported for {path.name}")
|
|
2636
|
+
return None, f"non-media: unsupported format (UUID={primary_uuid})"
|
|
2637
|
+
|
|
2638
|
+
# UUID system says this format is supported - use its action
|
|
2639
|
+
LOG.debug(f"UUID-based action for {path.name}: {uuid_action} (UUID={primary_uuid}, container={container_uuid_param})")
|
|
2640
|
+
|
|
2641
|
+
# JSON is the sole source of truth - we already have uuid_action from above
|
|
2642
|
+
# Keep rule for metadata only (rule_id, notes, extensions for legacy compatibility)
|
|
2643
|
+
if not rule:
|
|
2644
|
+
# No rule found - but UUID system already approved it, so create a minimal rule
|
|
2645
|
+
# This shouldn't happen often as most formats should have rules
|
|
2646
|
+
LOG.warning(f"UUID {detected_uuid} approved but no format rule found for {path.name}")
|
|
2647
|
+
return None, f"no format rule found for detected UUID {detected_uuid}"
|
|
2648
|
+
|
|
2649
|
+
# Use uuid_action as the effective action (JSON is authoritative)
|
|
2650
|
+
effective_action = uuid_action
|
|
2651
|
+
|
|
2652
|
+
if rule.category == "vector":
|
|
2653
|
+
return None, "vector formats are not supported by Apple Photos"
|
|
2654
|
+
|
|
2655
|
+
metadata: dict[str, Any] = {
|
|
2656
|
+
"rule_conditions": rule.conditions,
|
|
2657
|
+
"rule_notes": rule.notes,
|
|
2658
|
+
"detected_uuid": detected_uuid,
|
|
2659
|
+
"uuid_canonical_name": uuid_canonical_name,
|
|
2660
|
+
"uuid_compatible": uuid_compatible,
|
|
2661
|
+
}
|
|
2662
|
+
|
|
2663
|
+
if rule.category == "raw":
|
|
2664
|
+
raw_extensions = [path.suffix] + list(rule.extensions)
|
|
2665
|
+
install_raw_dependency_groups(collect_raw_groups_from_extensions(raw_extensions))
|
|
2666
|
+
raw_media, raw_reason = refine_raw_media(path, raw_extensions)
|
|
2667
|
+
if not raw_media:
|
|
2668
|
+
return None, raw_reason or "unsupported raw format"
|
|
2669
|
+
raw_media.rule_id = rule.rule_id
|
|
2670
|
+
raw_media.action = effective_action
|
|
2671
|
+
raw_media.requires_processing = effective_action != "import"
|
|
2672
|
+
raw_media.notes = rule.notes
|
|
2673
|
+
raw_media.metadata.update(metadata)
|
|
2674
|
+
return raw_media, None
|
|
2675
|
+
|
|
2676
|
+
original_extension = canonicalize_extension(path.suffix) # Apply canonicalization
|
|
2677
|
+
consensus_extension = canonicalize_extension(consensus.extension) if consensus else None # Apply canonicalization
|
|
2678
|
+
preferred_extension = canonicalize_extension(rule.extensions[0]) if rule.extensions else None # Apply canonicalization
|
|
2679
|
+
|
|
2680
|
+
# NEVER change extension unless format detected differs from file extension
|
|
2681
|
+
# Priority: always keep original if valid, only use detected format if no extension or wrong extension
|
|
2682
|
+
if original_extension and rule.extensions and original_extension in rule.extensions:
|
|
2683
|
+
# Original extension is valid for the detected format - keep it!
|
|
2684
|
+
extension = original_extension
|
|
2685
|
+
elif original_extension:
|
|
2686
|
+
# File has extension but it doesn't match detected format - use detected format
|
|
2687
|
+
extension = consensus_extension or preferred_extension or original_extension or ".media"
|
|
2688
|
+
else:
|
|
2689
|
+
# File has no extension - use detected format
|
|
2690
|
+
extension = consensus_extension or preferred_extension or ".media"
|
|
2691
|
+
if detected_kind == "image":
|
|
2692
|
+
media = MediaFile(
|
|
2693
|
+
source=path,
|
|
2694
|
+
kind="image",
|
|
2695
|
+
extension=extension or ".img",
|
|
2696
|
+
format_name=(extension or ".img").lstrip("."),
|
|
2697
|
+
compatible=effective_action == "import",
|
|
2698
|
+
original_suffix=path.suffix,
|
|
2699
|
+
rule_id=rule.rule_id,
|
|
2700
|
+
action=effective_action,
|
|
2701
|
+
requires_processing=effective_action != "import",
|
|
2702
|
+
notes=rule.notes,
|
|
2703
|
+
metadata=metadata,
|
|
2704
|
+
)
|
|
2705
|
+
media.detected_compatible = media.compatible
|
|
2706
|
+
media.metadata.update(
|
|
2707
|
+
{
|
|
2708
|
+
"animated": animated,
|
|
2709
|
+
"size_bytes": size_bytes,
|
|
2710
|
+
"psd_color_mode": psd_color_mode,
|
|
2711
|
+
}
|
|
2712
|
+
)
|
|
2713
|
+
refined_media, refine_reason = refine_image_media(media, skip_compatibility_check)
|
|
2714
|
+
if refined_media is None:
|
|
2715
|
+
return None, refine_reason or "image validation failed"
|
|
2716
|
+
return refined_media, None
|
|
2717
|
+
|
|
2718
|
+
if detected_kind == "video":
|
|
2719
|
+
media = MediaFile(
|
|
2720
|
+
source=path,
|
|
2721
|
+
kind="video",
|
|
2722
|
+
extension=extension or ".mp4",
|
|
2723
|
+
format_name=container or "video",
|
|
2724
|
+
compatible=effective_action == "import",
|
|
2725
|
+
video_codec=video_codec,
|
|
2726
|
+
audio_codec=audio_codec,
|
|
2727
|
+
audio_sample_rate=audio_sample_rate,
|
|
2728
|
+
audio_sample_fmt=audio_sample_fmt,
|
|
2729
|
+
original_suffix=path.suffix,
|
|
2730
|
+
rule_id=rule.rule_id,
|
|
2731
|
+
action=effective_action,
|
|
2732
|
+
requires_processing=effective_action != "import",
|
|
2733
|
+
notes=rule.notes,
|
|
2734
|
+
metadata=metadata,
|
|
2735
|
+
)
|
|
2736
|
+
media.detected_compatible = media.compatible
|
|
2737
|
+
media.metadata.update(
|
|
2738
|
+
{
|
|
2739
|
+
"container": container,
|
|
2740
|
+
"size_bytes": size_bytes,
|
|
2741
|
+
"audio_channels": audio_channels,
|
|
2742
|
+
"audio_layout": audio_layout,
|
|
2743
|
+
"audio_sample_rate": audio_sample_rate,
|
|
2744
|
+
"audio_sample_fmt": audio_sample_fmt,
|
|
2745
|
+
}
|
|
2746
|
+
)
|
|
2747
|
+
# Add normalized metadata from ffprobe (UUID-keyed fields)
|
|
2748
|
+
# This includes creation_time, artist, title, etc. with UUID keys
|
|
2749
|
+
if normalized_metadata:
|
|
2750
|
+
media.metadata.update(normalized_metadata)
|
|
2751
|
+
refined_media, refine_reason = refine_video_media(media, skip_compatibility_check)
|
|
2752
|
+
if refined_media is None:
|
|
2753
|
+
return None, refine_reason or "video validation failed"
|
|
2754
|
+
return refined_media, None
|
|
2755
|
+
|
|
2756
|
+
return None, "unsupported format"
|
|
2757
|
+
|
|
2758
|
+
|
|
2759
|
+
def safe_filetype_guess(path: Path) -> Signature:
|
|
2760
|
+
try:
|
|
2761
|
+
guess = filetype.guess(str(path))
|
|
2762
|
+
except Exception: # noqa: BLE001
|
|
2763
|
+
return Signature()
|
|
2764
|
+
if not guess:
|
|
2765
|
+
return Signature()
|
|
2766
|
+
extension = normalize_extension(guess.extension)
|
|
2767
|
+
mime = guess.mime.lower() if guess.mime else None
|
|
2768
|
+
return Signature(extension=extension, mime=mime)
|
|
2769
|
+
|
|
2770
|
+
|
|
2771
|
+
def safe_puremagic_guess(path: Path) -> Signature:
|
|
2772
|
+
extension = None
|
|
2773
|
+
mime = None
|
|
2774
|
+
try:
|
|
2775
|
+
extension = normalize_extension(puremagic.from_file(str(path)))
|
|
2776
|
+
except puremagic.PureError:
|
|
2777
|
+
extension = None
|
|
2778
|
+
except Exception: # noqa: BLE001
|
|
2779
|
+
extension = None
|
|
2780
|
+
try:
|
|
2781
|
+
mime_guess = puremagic.from_file(str(path), mime=True)
|
|
2782
|
+
mime = mime_guess.lower() if mime_guess else None
|
|
2783
|
+
except puremagic.PureError:
|
|
2784
|
+
mime = None
|
|
2785
|
+
except Exception: # noqa: BLE001
|
|
2786
|
+
mime = None
|
|
2787
|
+
return Signature(extension=extension, mime=mime)
|
|
2788
|
+
|
|
2789
|
+
|
|
2790
|
+
def canonical_image_extension(name: Optional[str]) -> Optional[str]:
|
|
2791
|
+
if not name:
|
|
2792
|
+
return None
|
|
2793
|
+
key = name.lower().lstrip(".")
|
|
2794
|
+
return IMAGE_EXTENSION_MAP.get(key)
|
|
2795
|
+
|
|
2796
|
+
|
|
2797
|
+
def canonical_video_extension(name: Optional[str]) -> Optional[str]:
|
|
2798
|
+
key = normalize_extension(name)
|
|
2799
|
+
if not key:
|
|
2800
|
+
return None
|
|
2801
|
+
return VIDEO_EXTENSION_MAP.get(key)
|
|
2802
|
+
|
|
2803
|
+
|
|
2804
|
+
def is_archive_signature(sig: Signature) -> bool:
|
|
2805
|
+
if not sig or sig.is_empty():
|
|
2806
|
+
return False
|
|
2807
|
+
if sig.extension and sig.extension in ARCHIVE_EXTENSIONS:
|
|
2808
|
+
return True
|
|
2809
|
+
if sig.mime and sig.mime in ARCHIVE_MIME_TYPES:
|
|
2810
|
+
return True
|
|
2811
|
+
return False
|
|
2812
|
+
|
|
2813
|
+
|
|
2814
|
+
def is_image_signature(sig: Signature) -> bool:
|
|
2815
|
+
if not sig or sig.is_empty():
|
|
2816
|
+
return False
|
|
2817
|
+
if sig.mime and sig.mime.startswith("image/"):
|
|
2818
|
+
return True
|
|
2819
|
+
if sig.extension and sig.extension in ALL_IMAGE_EXTENSIONS:
|
|
2820
|
+
return True
|
|
2821
|
+
return False
|
|
2822
|
+
|
|
2823
|
+
|
|
2824
|
+
def is_video_signature(sig: Signature) -> bool:
|
|
2825
|
+
if not sig or sig.is_empty():
|
|
2826
|
+
return False
|
|
2827
|
+
if sig.mime and sig.mime.startswith("video/"):
|
|
2828
|
+
return True
|
|
2829
|
+
if sig.extension and sig.extension in VIDEO_EXTENSION_HINTS:
|
|
2830
|
+
return True
|
|
2831
|
+
return False
|
|
2832
|
+
|
|
2833
|
+
|
|
2834
|
+
def choose_image_extension(signatures: Iterable[Signature]) -> Optional[str]:
|
|
2835
|
+
for sig in signatures:
|
|
2836
|
+
ext = canonical_image_extension(sig.extension)
|
|
2837
|
+
if ext:
|
|
2838
|
+
return ext
|
|
2839
|
+
for sig in signatures:
|
|
2840
|
+
if sig.mime:
|
|
2841
|
+
mapped = IMAGE_MIME_EXTENSION_MAP.get(sig.mime)
|
|
2842
|
+
if mapped:
|
|
2843
|
+
return mapped
|
|
2844
|
+
return None
|
|
2845
|
+
|
|
2846
|
+
|
|
2847
|
+
def choose_video_extension(signatures: Iterable[Signature]) -> Optional[str]:
|
|
2848
|
+
for sig in signatures:
|
|
2849
|
+
ext = canonical_video_extension(sig.extension)
|
|
2850
|
+
if ext:
|
|
2851
|
+
return ext
|
|
2852
|
+
for sig in signatures:
|
|
2853
|
+
if sig.mime:
|
|
2854
|
+
mapped = VIDEO_MIME_EXTENSION_MAP.get(sig.mime)
|
|
2855
|
+
if mapped:
|
|
2856
|
+
return mapped
|
|
2857
|
+
return None
|
|
2858
|
+
|
|
2859
|
+
|
|
2860
|
+
def guess_extension(container: str, kind: str) -> Optional[str]:
|
|
2861
|
+
container = container.lower()
|
|
2862
|
+
if kind == "image":
|
|
2863
|
+
return IMAGE_EXTENSION_MAP.get(container)
|
|
2864
|
+
video_map = {
|
|
2865
|
+
"mov": ".mov",
|
|
2866
|
+
"quicktime": ".mov",
|
|
2867
|
+
"mp4": ".mp4",
|
|
2868
|
+
"m4v": ".m4v",
|
|
2869
|
+
"matroska": ".mkv",
|
|
2870
|
+
"webm": ".webm",
|
|
2871
|
+
"avi": ".avi",
|
|
2872
|
+
"3gpp": ".3gp",
|
|
2873
|
+
"mpegts": ".ts",
|
|
2874
|
+
"flv": ".flv",
|
|
2875
|
+
}
|
|
2876
|
+
return video_map.get(container)
|
|
2877
|
+
|
|
2878
|
+
|
|
2879
|
+
def should_ignore(entry: Path) -> bool:
|
|
2880
|
+
"""Check if file/directory should be excluded from scanning.
|
|
2881
|
+
|
|
2882
|
+
Excludes:
|
|
2883
|
+
- FOUND_MEDIA_FILES_* staging directories
|
|
2884
|
+
- .smm__runtime_logs_* log directories (timestamped, in CWD)
|
|
2885
|
+
- smm_run_* and smm_skipped_files_* log files
|
|
2886
|
+
- .DS_Store system files
|
|
2887
|
+
- Files with __SMM token (already processed by SMM)
|
|
2888
|
+
- Files managed by Apple Photos (have assetsd xattrs)
|
|
2889
|
+
"""
|
|
2890
|
+
name = entry.name
|
|
2891
|
+
# Exclude staging directories
|
|
2892
|
+
if name.startswith("FOUND_MEDIA_FILES_"):
|
|
2893
|
+
return True
|
|
2894
|
+
# Exclude timestamped log directories (new pattern)
|
|
2895
|
+
if name.startswith(SMM_LOGS_SUBDIR):
|
|
2896
|
+
return True
|
|
2897
|
+
if name.startswith("DEBUG_raw_applescript_output_") or name.startswith("DEBUG_photos_output_"):
|
|
2898
|
+
return True
|
|
2899
|
+
if name.startswith("Photos_rejections_"):
|
|
2900
|
+
return True
|
|
2901
|
+
# Exclude individual log files and skip logs (legacy/backward compat)
|
|
2902
|
+
if name.startswith("smm_run_") or name.startswith("smm_skipped_files_"):
|
|
2903
|
+
return True
|
|
2904
|
+
# Exclude macOS metadata
|
|
2905
|
+
if name == ".DS_Store":
|
|
2906
|
+
return True
|
|
2907
|
+
# Exclude files already processed by SMM (have __SMM token in filename)
|
|
2908
|
+
if STAGING_TOKEN_PREFIX in name:
|
|
2909
|
+
LOG.debug("Skipping already-processed file: %s", name)
|
|
2910
|
+
return True
|
|
2911
|
+
return False
|
|
2912
|
+
|
|
2913
|
+
|
|
2914
|
+
def is_photos_managed_file(path: Path) -> bool:
|
|
2915
|
+
"""Check if a file is managed by Apple Photos (has assetsd xattrs).
|
|
2916
|
+
|
|
2917
|
+
Files imported into Apple Photos get extended attributes from the assetsd
|
|
2918
|
+
daemon. These files are locked by the Photos database and cannot be moved
|
|
2919
|
+
or modified without causing sync issues or permission errors.
|
|
2920
|
+
|
|
2921
|
+
Args:
|
|
2922
|
+
path: Path to check
|
|
2923
|
+
|
|
2924
|
+
Returns:
|
|
2925
|
+
True if file has com.apple.assetsd.UUID xattr (managed by Photos)
|
|
2926
|
+
"""
|
|
2927
|
+
try:
|
|
2928
|
+
import xattr # type: ignore[import-not-found]
|
|
2929
|
+
|
|
2930
|
+
attrs = xattr.listxattr(str(path))
|
|
2931
|
+
# Check for the UUID attribute which definitively marks Photos-managed files
|
|
2932
|
+
return "com.apple.assetsd.UUID" in attrs
|
|
2933
|
+
except ImportError:
|
|
2934
|
+
# xattr module not available, fall back to subprocess
|
|
2935
|
+
# Use xattr without -l to list only attribute names (avoids binary value decoding issues)
|
|
2936
|
+
try:
|
|
2937
|
+
result = subprocess.run(
|
|
2938
|
+
["xattr", str(path)],
|
|
2939
|
+
capture_output=True,
|
|
2940
|
+
text=True,
|
|
2941
|
+
timeout=5,
|
|
2942
|
+
)
|
|
2943
|
+
return "com.apple.assetsd.UUID" in result.stdout
|
|
2944
|
+
except Exception:
|
|
2945
|
+
return False
|
|
2946
|
+
except Exception:
|
|
2947
|
+
return False
|
|
2948
|
+
|
|
2949
|
+
|
|
2950
|
+
def extract_live_photo_content_id(path: Path) -> Optional[str]:
|
|
2951
|
+
"""
|
|
2952
|
+
Extract Live Photo content identifier from HEIC/MOV file using exiftool.
|
|
2953
|
+
|
|
2954
|
+
Live Photos have a content identifier that links the HEIC photo and MOV video.
|
|
2955
|
+
This function extracts that identifier to enable pairing detection.
|
|
2956
|
+
|
|
2957
|
+
Args:
|
|
2958
|
+
path: Path to HEIC or MOV file
|
|
2959
|
+
|
|
2960
|
+
Returns:
|
|
2961
|
+
Content identifier string if found, None otherwise
|
|
2962
|
+
"""
|
|
2963
|
+
exiftool = find_executable("exiftool")
|
|
2964
|
+
if not exiftool:
|
|
2965
|
+
LOG.debug("exiftool not available, skipping Live Photo content ID extraction")
|
|
2966
|
+
return None
|
|
2967
|
+
|
|
2968
|
+
try:
|
|
2969
|
+
result = subprocess.run(
|
|
2970
|
+
[exiftool, "-ContentIdentifier", "-b", str(path)],
|
|
2971
|
+
capture_output=True,
|
|
2972
|
+
text=True,
|
|
2973
|
+
timeout=10,
|
|
2974
|
+
check=False,
|
|
2975
|
+
)
|
|
2976
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
2977
|
+
content_id = result.stdout.strip()
|
|
2978
|
+
LOG.debug("Extracted Live Photo content ID from %s: %s", path.name, content_id)
|
|
2979
|
+
return content_id
|
|
2980
|
+
except Exception as exc:
|
|
2981
|
+
LOG.debug("Failed to extract Live Photo content ID from %s: %s", path.name, exc)
|
|
2982
|
+
return None
|
|
2983
|
+
|
|
2984
|
+
|
|
2985
|
+
def is_panoramic_photo(path: Path) -> bool:
|
|
2986
|
+
"""
|
|
2987
|
+
Detect if a photo is a panoramic image using EXIF metadata.
|
|
2988
|
+
|
|
2989
|
+
Panoramic photos have special metadata tags that identify them as panoramas.
|
|
2990
|
+
Common indicators include ProjectionType, UsePanoramaViewer, or PoseHeadingDegrees.
|
|
2991
|
+
|
|
2992
|
+
Args:
|
|
2993
|
+
path: Path to image file
|
|
2994
|
+
|
|
2995
|
+
Returns:
|
|
2996
|
+
True if panoramic metadata detected, False otherwise
|
|
2997
|
+
"""
|
|
2998
|
+
exiftool = find_executable("exiftool")
|
|
2999
|
+
if not exiftool:
|
|
3000
|
+
LOG.debug("exiftool not available, skipping panoramic photo detection")
|
|
3001
|
+
return False
|
|
3002
|
+
|
|
3003
|
+
try:
|
|
3004
|
+
result = subprocess.run(
|
|
3005
|
+
[exiftool, "-ProjectionType", "-UsePanoramaViewer", "-PoseHeadingDegrees", "-b", str(path)],
|
|
3006
|
+
capture_output=True,
|
|
3007
|
+
text=True,
|
|
3008
|
+
timeout=10,
|
|
3009
|
+
check=False,
|
|
3010
|
+
)
|
|
3011
|
+
if result.returncode == 0:
|
|
3012
|
+
output = result.stdout.strip()
|
|
3013
|
+
# Check for common panoramic indicators
|
|
3014
|
+
if output and any(indicator in output.lower() for indicator in ["equirectangular", "cylindrical", "spherical", "true", "360"]):
|
|
3015
|
+
LOG.debug("Detected panoramic photo: %s", path.name)
|
|
3016
|
+
return True
|
|
3017
|
+
except Exception as exc:
|
|
3018
|
+
LOG.debug("Failed to check panoramic metadata for %s: %s", path.name, exc)
|
|
3019
|
+
return False
|
|
3020
|
+
|
|
3021
|
+
|
|
3022
|
+
def detect_live_photo_pairs(media_files: list[MediaFile]) -> dict[str, tuple[MediaFile, MediaFile]]:
|
|
3023
|
+
"""
|
|
3024
|
+
Detect Live Photo pairs (HEIC + MOV) by matching stems and content identifiers.
|
|
3025
|
+
|
|
3026
|
+
Live Photos consist of:
|
|
3027
|
+
- A HEIC/JPG still image
|
|
3028
|
+
- A MOV video clip
|
|
3029
|
+
- Both files share the same stem (e.g., IMG_1234.HEIC + IMG_1234.MOV)
|
|
3030
|
+
- Both files have matching ContentIdentifier metadata
|
|
3031
|
+
|
|
3032
|
+
Args:
|
|
3033
|
+
media_files: List of detected media files
|
|
3034
|
+
|
|
3035
|
+
Returns:
|
|
3036
|
+
Dictionary mapping content_id -> (image_file, video_file) for each Live Photo pair
|
|
3037
|
+
"""
|
|
3038
|
+
# Group files by stem
|
|
3039
|
+
files_by_stem: dict[str, list[MediaFile]] = {}
|
|
3040
|
+
for media in media_files:
|
|
3041
|
+
stem = media.source.stem
|
|
3042
|
+
if stem not in files_by_stem:
|
|
3043
|
+
files_by_stem[stem] = []
|
|
3044
|
+
files_by_stem[stem].append(media)
|
|
3045
|
+
|
|
3046
|
+
live_photo_pairs: dict[str, tuple[MediaFile, MediaFile]] = {}
|
|
3047
|
+
|
|
3048
|
+
# Check each stem group for Live Photo patterns
|
|
3049
|
+
for stem, files in files_by_stem.items():
|
|
3050
|
+
if len(files) < 2:
|
|
3051
|
+
continue
|
|
3052
|
+
|
|
3053
|
+
# Find HEIC/JPG and MOV candidates
|
|
3054
|
+
image_candidates = [f for f in files if f.kind == "image" and f.extension.lower() in {".heic", ".heif", ".jpg", ".jpeg"}]
|
|
3055
|
+
video_candidates = [f for f in files if f.kind == "video" and f.extension.lower() == ".mov"]
|
|
3056
|
+
|
|
3057
|
+
if not image_candidates or not video_candidates:
|
|
3058
|
+
continue
|
|
3059
|
+
|
|
3060
|
+
# Try to match by content identifier
|
|
3061
|
+
for img in image_candidates:
|
|
3062
|
+
img_content_id = extract_live_photo_content_id(img.source)
|
|
3063
|
+
if not img_content_id:
|
|
3064
|
+
continue
|
|
3065
|
+
|
|
3066
|
+
for vid in video_candidates:
|
|
3067
|
+
vid_content_id = extract_live_photo_content_id(vid.source)
|
|
3068
|
+
if vid_content_id and vid_content_id == img_content_id:
|
|
3069
|
+
# Found a Live Photo pair!
|
|
3070
|
+
LOG.debug("Detected Live Photo pair: %s + %s (content ID: %s)", img.source.name, vid.source.name, img_content_id)
|
|
3071
|
+
live_photo_pairs[img_content_id] = (img, vid)
|
|
3072
|
+
|
|
3073
|
+
# Store pairing metadata in both files
|
|
3074
|
+
img.metadata["is_live_photo"] = True
|
|
3075
|
+
img.metadata["live_photo_pair"] = str(vid.source)
|
|
3076
|
+
img.metadata["live_photo_content_id"] = img_content_id
|
|
3077
|
+
|
|
3078
|
+
vid.metadata["is_live_photo"] = True
|
|
3079
|
+
vid.metadata["live_photo_pair"] = str(img.source)
|
|
3080
|
+
vid.metadata["live_photo_content_id"] = vid_content_id
|
|
3081
|
+
break
|
|
3082
|
+
|
|
3083
|
+
return live_photo_pairs
|
|
3084
|
+
|
|
3085
|
+
|
|
3086
|
+
def gather_media_files(
|
|
3087
|
+
root: Path,
|
|
3088
|
+
recursive: bool,
|
|
3089
|
+
follow_symlinks: bool,
|
|
3090
|
+
skip_logger: SkipLogger,
|
|
3091
|
+
stats: RunStatistics,
|
|
3092
|
+
skip_compatibility_check: bool = False,
|
|
3093
|
+
) -> list[MediaFile]:
|
|
3094
|
+
media_files: list[MediaFile] = []
|
|
3095
|
+
|
|
3096
|
+
def iter_candidate_files() -> Iterable[Path]:
|
|
3097
|
+
if recursive:
|
|
3098
|
+
for dirpath, dirnames, filenames in os.walk(root, followlinks=follow_symlinks):
|
|
3099
|
+
dirnames[:] = [d for d in dirnames if not should_ignore(Path(dirpath) / d)]
|
|
3100
|
+
for filename in filenames:
|
|
3101
|
+
entry = Path(dirpath) / filename
|
|
3102
|
+
if should_ignore(entry) or entry.is_dir():
|
|
3103
|
+
continue
|
|
3104
|
+
yield entry
|
|
3105
|
+
else:
|
|
3106
|
+
for entry in root.iterdir():
|
|
3107
|
+
if should_ignore(entry) or entry.is_dir():
|
|
3108
|
+
continue
|
|
3109
|
+
yield entry
|
|
3110
|
+
|
|
3111
|
+
scan_progress = ProgressReporter(0, "Scanning files")
|
|
3112
|
+
|
|
3113
|
+
def handle_file(file_path: Path) -> None:
|
|
3114
|
+
stats.total_files_scanned += 1
|
|
3115
|
+
|
|
3116
|
+
if file_path.is_symlink() and not follow_symlinks:
|
|
3117
|
+
skip_logger.log(file_path, "symlink (use --follow-symlinks to allow)")
|
|
3118
|
+
stats.skipped_other += 1
|
|
3119
|
+
return
|
|
3120
|
+
if not file_path.is_file():
|
|
3121
|
+
return
|
|
3122
|
+
|
|
3123
|
+
# Skip files managed by Apple Photos - they're already imported and locked
|
|
3124
|
+
if is_photos_managed_file(file_path):
|
|
3125
|
+
LOG.debug("Skipping Photos-managed file: %s", file_path.name)
|
|
3126
|
+
stats.skipped_other += 1
|
|
3127
|
+
return
|
|
3128
|
+
|
|
3129
|
+
skippable_reason = is_skippable_file(file_path)
|
|
3130
|
+
if skippable_reason:
|
|
3131
|
+
skip_logger.log(file_path, skippable_reason)
|
|
3132
|
+
if "text file" in skippable_reason.lower():
|
|
3133
|
+
stats.total_text_files += 1
|
|
3134
|
+
elif "empty" in skippable_reason.lower() or "corrupt" in skippable_reason.lower():
|
|
3135
|
+
stats.skipped_corrupt_or_empty += 1
|
|
3136
|
+
else:
|
|
3137
|
+
stats.skipped_other += 1
|
|
3138
|
+
return
|
|
3139
|
+
|
|
3140
|
+
# File is binary
|
|
3141
|
+
stats.total_binary_files += 1
|
|
3142
|
+
|
|
3143
|
+
media, reject_reason = detect_media(file_path, skip_compatibility_check)
|
|
3144
|
+
if media:
|
|
3145
|
+
stats.total_media_detected += 1
|
|
3146
|
+
if media.compatible and media.action == "import":
|
|
3147
|
+
stats.media_compatible += 1
|
|
3148
|
+
else:
|
|
3149
|
+
stats.media_incompatible += 1
|
|
3150
|
+
if media.action and not media.action.startswith("skip"):
|
|
3151
|
+
stats.incompatible_with_conversion_rule += 1
|
|
3152
|
+
|
|
3153
|
+
# Check for panoramic photos
|
|
3154
|
+
if media.kind == "image" and media.extension.lower() in {".heic", ".heif", ".jpg", ".jpeg"}:
|
|
3155
|
+
if is_panoramic_photo(file_path):
|
|
3156
|
+
media.metadata["is_panoramic"] = True
|
|
3157
|
+
LOG.debug("Detected panoramic photo: %s", file_path.name)
|
|
3158
|
+
|
|
3159
|
+
media_files.append(media)
|
|
3160
|
+
return
|
|
3161
|
+
if reject_reason:
|
|
3162
|
+
reason_lower = reject_reason.lower()
|
|
3163
|
+
is_non_media = reason_lower.startswith("non-media:")
|
|
3164
|
+
if not is_non_media:
|
|
3165
|
+
is_non_media = any(keyword in reason_lower for keyword in NON_MEDIA_REASON_KEYWORDS)
|
|
3166
|
+
if "unknown" in reason_lower or "not recognised" in reason_lower:
|
|
3167
|
+
stats.skipped_unknown_format += 1
|
|
3168
|
+
log_reason = reject_reason
|
|
3169
|
+
elif "corrupt" in reason_lower or "empty" in reason_lower:
|
|
3170
|
+
stats.skipped_corrupt_or_empty += 1
|
|
3171
|
+
log_reason = reject_reason
|
|
3172
|
+
elif is_non_media:
|
|
3173
|
+
stats.skipped_non_media += 1
|
|
3174
|
+
if reason_lower.startswith("non-media:") and ":" in reject_reason:
|
|
3175
|
+
log_reason = reject_reason.split(":", 1)[1].strip()
|
|
3176
|
+
if not log_reason:
|
|
3177
|
+
log_reason = "non-media file"
|
|
3178
|
+
else:
|
|
3179
|
+
log_reason = reject_reason
|
|
3180
|
+
else:
|
|
3181
|
+
stats.skipped_errors += 1
|
|
3182
|
+
log_reason = reject_reason
|
|
3183
|
+
if not is_non_media:
|
|
3184
|
+
skip_logger.log(file_path, log_reason)
|
|
3185
|
+
return
|
|
3186
|
+
|
|
3187
|
+
suffix = normalize_extension(file_path.suffix)
|
|
3188
|
+
signatures = [safe_filetype_guess(file_path), safe_puremagic_guess(file_path)]
|
|
3189
|
+
if (suffix and (suffix in ALL_IMAGE_EXTENSIONS or suffix in VIDEO_EXTENSION_HINTS)) or any(is_image_signature(sig) or is_video_signature(sig) for sig in signatures):
|
|
3190
|
+
skip_logger.log(file_path, "corrupt or unsupported media")
|
|
3191
|
+
stats.skipped_corrupt_or_empty += 1
|
|
3192
|
+
|
|
3193
|
+
for file_path in iter_candidate_files():
|
|
3194
|
+
handle_file(file_path)
|
|
3195
|
+
scan_progress.update()
|
|
3196
|
+
|
|
3197
|
+
scan_progress.finish()
|
|
3198
|
+
|
|
3199
|
+
# Detect Live Photo pairs after all files are scanned
|
|
3200
|
+
if media_files:
|
|
3201
|
+
live_photo_pairs = detect_live_photo_pairs(media_files)
|
|
3202
|
+
if live_photo_pairs:
|
|
3203
|
+
LOG.debug("Found %d Live Photo pair(s)", len(live_photo_pairs))
|
|
3204
|
+
|
|
3205
|
+
return media_files
|
|
3206
|
+
|
|
3207
|
+
|
|
3208
|
+
def next_available_name(directory: Path, stem: str, extension: str) -> Path:
|
|
3209
|
+
counter = 0
|
|
3210
|
+
while True:
|
|
3211
|
+
suffix = "" if counter == 0 else f"_{counter}"
|
|
3212
|
+
candidate = directory / f"{stem}{suffix}{extension}"
|
|
3213
|
+
if not candidate.exists():
|
|
3214
|
+
return candidate
|
|
3215
|
+
counter += 1
|
|
3216
|
+
|
|
3217
|
+
|
|
3218
|
+
def build_safe_stem(original_stem: str, run_token: str, sequence: int) -> str:
|
|
3219
|
+
normalized = unicodedata.normalize("NFKD", original_stem)
|
|
3220
|
+
ascii_stem = normalized.encode("ascii", "ignore").decode("ascii")
|
|
3221
|
+
ascii_stem = SAFE_NAME_PATTERN.sub("_", ascii_stem)
|
|
3222
|
+
ascii_stem = re.sub(r"_+", "_", ascii_stem).strip("._- ")
|
|
3223
|
+
if not ascii_stem:
|
|
3224
|
+
ascii_stem = "media"
|
|
3225
|
+
|
|
3226
|
+
run_fragment = run_token[-6:] if len(run_token) >= 6 else run_token
|
|
3227
|
+
run_fragment = run_fragment or "run"
|
|
3228
|
+
unique_suffix = f"{run_fragment}{sequence:04d}"
|
|
3229
|
+
|
|
3230
|
+
base_limit = max(10, MAX_SAFE_STEM_LENGTH - len(unique_suffix) - 1)
|
|
3231
|
+
if len(ascii_stem) > base_limit:
|
|
3232
|
+
ascii_stem = ascii_stem[:base_limit].rstrip("._- ") or "media"
|
|
3233
|
+
|
|
3234
|
+
safe_stem = f"{ascii_stem}_{unique_suffix}"
|
|
3235
|
+
return safe_stem[:MAX_SAFE_STEM_LENGTH]
|
|
3236
|
+
|
|
3237
|
+
|
|
3238
|
+
def stem_needs_sanitization(stem: str) -> bool:
|
|
3239
|
+
if not stem:
|
|
3240
|
+
return True
|
|
3241
|
+
if SAFE_NAME_PATTERN.search(stem):
|
|
3242
|
+
return True
|
|
3243
|
+
if len(stem) > MAX_SAFE_STEM_LENGTH:
|
|
3244
|
+
return True
|
|
3245
|
+
if stem.strip() != stem:
|
|
3246
|
+
return True
|
|
3247
|
+
return False
|
|
3248
|
+
|
|
3249
|
+
|
|
3250
|
+
def move_to_staging(
|
|
3251
|
+
media_files: Iterable[MediaFile],
|
|
3252
|
+
staging: Path,
|
|
3253
|
+
originals_dir: Path,
|
|
3254
|
+
copy_files: bool = False,
|
|
3255
|
+
) -> None:
|
|
3256
|
+
"""Stage media files with unique sequential suffix for folder import.
|
|
3257
|
+
|
|
3258
|
+
Every file gets a suffix like " (1)", " (2)", etc. before extension.
|
|
3259
|
+
This enables deterministic filename reconciliation after Photos import,
|
|
3260
|
+
eliminating the need for separate sanitization passes.
|
|
3261
|
+
|
|
3262
|
+
The sequential suffix ensures every file has a unique, predictable name
|
|
3263
|
+
that can be matched against Photos' returned filenames to determine
|
|
3264
|
+
which files were imported vs skipped.
|
|
3265
|
+
|
|
3266
|
+
Examples:
|
|
3267
|
+
photo.jpg → photo (1).jpg
|
|
3268
|
+
photo.jpg (from different subfolder) → photo (2).jpg
|
|
3269
|
+
video.mov → video (1).mov
|
|
3270
|
+
IMG_1234.HEIC (Live Photo) → IMG_1234 (1).HEIC
|
|
3271
|
+
IMG_1234.MOV (paired) → IMG_1234 (2).MOV
|
|
3272
|
+
|
|
3273
|
+
Args:
|
|
3274
|
+
media_files: Iterable of MediaFile objects to stage
|
|
3275
|
+
staging: Path to staging directory (FOUND_MEDIA_FILES_*)
|
|
3276
|
+
originals_dir: Path to originals archive directory (SEPARATE from staging, not a subdirectory)
|
|
3277
|
+
|
|
3278
|
+
Note:
|
|
3279
|
+
Live Photo pairs maintain consistent stems but get different suffixes
|
|
3280
|
+
since they are separate files.
|
|
3281
|
+
"""
|
|
3282
|
+
originals_dir.mkdir(parents=True, exist_ok=True)
|
|
3283
|
+
media_list = list(media_files)
|
|
3284
|
+
|
|
3285
|
+
# Global sequence counter for ALL files (starts at 1)
|
|
3286
|
+
sequence_counter = 1
|
|
3287
|
+
run_token = uuid.uuid4().hex
|
|
3288
|
+
|
|
3289
|
+
# Track Live Photo pairs to ensure consistent naming
|
|
3290
|
+
live_photo_stems: dict[str, str] = {} # Maps content_id -> chosen_stem
|
|
3291
|
+
|
|
3292
|
+
progress = ProgressReporter(len(media_list), "Staging media")
|
|
3293
|
+
for media in media_list:
|
|
3294
|
+
stem = media.source.stem.replace(" ", "_") # Replace spaces to avoid Photos/import quirks
|
|
3295
|
+
|
|
3296
|
+
if stem_needs_sanitization(stem):
|
|
3297
|
+
stem = build_safe_stem(stem, run_token, sequence_counter)
|
|
3298
|
+
|
|
3299
|
+
token = uuid.uuid4().hex[:8]
|
|
3300
|
+
token_component = f"{STAGING_TOKEN_PREFIX}{token}__"
|
|
3301
|
+
|
|
3302
|
+
# Precompute suffix now to enforce Apple Photos filename length limit; no spaces
|
|
3303
|
+
suffix = f"_({sequence_counter})"
|
|
3304
|
+
|
|
3305
|
+
# Enforce both safe-stem limit and Apple Photos filename length (60 chars)
|
|
3306
|
+
max_base_len = max(
|
|
3307
|
+
5,
|
|
3308
|
+
min(
|
|
3309
|
+
MAX_SAFE_STEM_LENGTH - len(token_component),
|
|
3310
|
+
MAX_PHOTOS_FILENAME_LENGTH - len(token_component) - len(suffix) - len(media.extension),
|
|
3311
|
+
),
|
|
3312
|
+
)
|
|
3313
|
+
if len(stem) > max_base_len:
|
|
3314
|
+
stem = stem[:max_base_len].rstrip("._- ") or "media"
|
|
3315
|
+
|
|
3316
|
+
tokenized_stem = f"{stem}{token_component}"
|
|
3317
|
+
|
|
3318
|
+
# Handle Live Photo pairs with consistent naming
|
|
3319
|
+
if media.metadata.get("is_live_photo"):
|
|
3320
|
+
content_id = media.metadata.get("live_photo_content_id")
|
|
3321
|
+
if content_id:
|
|
3322
|
+
if content_id in live_photo_stems:
|
|
3323
|
+
# Use the same stem as the paired file
|
|
3324
|
+
live_stem = live_photo_stems[content_id]
|
|
3325
|
+
if stem != live_stem:
|
|
3326
|
+
stem = live_stem
|
|
3327
|
+
max_base_len = max(
|
|
3328
|
+
5,
|
|
3329
|
+
min(
|
|
3330
|
+
MAX_SAFE_STEM_LENGTH - len(token_component),
|
|
3331
|
+
MAX_PHOTOS_FILENAME_LENGTH - len(token_component) - len(suffix) - len(media.extension),
|
|
3332
|
+
),
|
|
3333
|
+
)
|
|
3334
|
+
if len(stem) > max_base_len:
|
|
3335
|
+
stem = stem[:max_base_len].rstrip("._- ") or "media"
|
|
3336
|
+
tokenized_stem = f"{stem}{token_component}"
|
|
3337
|
+
LOG.debug("Using paired stem %s for Live Photo %s", stem, media.source.name)
|
|
3338
|
+
else:
|
|
3339
|
+
# First file of the pair - store the sanitized stem for the paired file
|
|
3340
|
+
live_photo_stems[content_id] = stem
|
|
3341
|
+
LOG.debug("Set stem %s for Live Photo pair (content ID: %s)", stem, content_id)
|
|
3342
|
+
|
|
3343
|
+
unique_name = f"{tokenized_stem}{suffix}{media.extension}"
|
|
3344
|
+
destination = staging / unique_name
|
|
3345
|
+
|
|
3346
|
+
# Handle collision (very unlikely with global counter, but safety net)
|
|
3347
|
+
collision_counter = 1
|
|
3348
|
+
while destination.exists():
|
|
3349
|
+
collision_counter += 1
|
|
3350
|
+
unique_name = f"{stem}_({sequence_counter}-{collision_counter}){media.extension}"
|
|
3351
|
+
destination = staging / unique_name
|
|
3352
|
+
|
|
3353
|
+
media.metadata.setdefault("original_source", str(media.source))
|
|
3354
|
+
LOG.debug("%s %s -> %s", "Copying" if copy_files else "Moving", media.source, destination)
|
|
3355
|
+
try:
|
|
3356
|
+
if copy_files:
|
|
3357
|
+
shutil.copy2(str(media.source), str(destination))
|
|
3358
|
+
else:
|
|
3359
|
+
shutil.move(str(media.source), str(destination))
|
|
3360
|
+
except PermissionError as exc:
|
|
3361
|
+
# File might be locked by Apple Photos or another process
|
|
3362
|
+
LOG.warning(
|
|
3363
|
+
"Permission denied for %s (may be locked by Photos): %s",
|
|
3364
|
+
media.source.name,
|
|
3365
|
+
exc,
|
|
3366
|
+
)
|
|
3367
|
+
media.stage_path = None
|
|
3368
|
+
media.metadata["staging_error"] = f"Permission denied: {exc}"
|
|
3369
|
+
progress.update()
|
|
3370
|
+
continue
|
|
3371
|
+
except OSError as exc:
|
|
3372
|
+
if exc.errno == 1: # EPERM - Operation not permitted
|
|
3373
|
+
LOG.warning(
|
|
3374
|
+
"Operation not permitted for %s (may be locked by Photos): %s",
|
|
3375
|
+
media.source.name,
|
|
3376
|
+
exc,
|
|
3377
|
+
)
|
|
3378
|
+
media.stage_path = None
|
|
3379
|
+
media.metadata["staging_error"] = f"Operation not permitted: {exc}"
|
|
3380
|
+
progress.update()
|
|
3381
|
+
continue
|
|
3382
|
+
raise
|
|
3383
|
+
media.stage_path = destination
|
|
3384
|
+
media.metadata["staging_stem"] = stem
|
|
3385
|
+
media.metadata["staging_suffix"] = suffix
|
|
3386
|
+
media.metadata["staging_name"] = destination.name
|
|
3387
|
+
media.metadata["staging_token"] = token
|
|
3388
|
+
media.metadata["staging_tokenized_stem"] = tokenized_stem
|
|
3389
|
+
media.metadata["copy_mode"] = copy_files
|
|
3390
|
+
sequence_counter += 1 # Increment for next file
|
|
3391
|
+
|
|
3392
|
+
# Archive original if processing is required (before conversion)
|
|
3393
|
+
if media.requires_processing and not copy_files:
|
|
3394
|
+
# Use next_available_name for originals since they don't need reconciliation
|
|
3395
|
+
original_target = next_available_name(originals_dir, stem, media.original_suffix or media.extension)
|
|
3396
|
+
try:
|
|
3397
|
+
shutil.copy2(destination, original_target)
|
|
3398
|
+
media.metadata["original_archive"] = str(original_target)
|
|
3399
|
+
except Exception as exc: # noqa: BLE001
|
|
3400
|
+
LOG.warning("Failed to archive original %s: %s", destination, exc)
|
|
3401
|
+
|
|
3402
|
+
progress.update()
|
|
3403
|
+
progress.finish()
|
|
3404
|
+
|
|
3405
|
+
|
|
3406
|
+
def restore_media_file(media: MediaFile) -> None:
|
|
3407
|
+
"""Restore media file to original location.
|
|
3408
|
+
|
|
3409
|
+
Used when reverting changes due to errors.
|
|
3410
|
+
No backups are used - the staged file is simply moved back.
|
|
3411
|
+
"""
|
|
3412
|
+
if media.metadata.get("copy_mode"):
|
|
3413
|
+
# In copy mode the source is untouched; simply remove staged copy
|
|
3414
|
+
if media.stage_path and media.stage_path.exists():
|
|
3415
|
+
media.stage_path.unlink()
|
|
3416
|
+
media.stage_path = None
|
|
3417
|
+
return
|
|
3418
|
+
restore_path = resolve_restore_path(media.source)
|
|
3419
|
+
restore_path.parent.mkdir(parents=True, exist_ok=True)
|
|
3420
|
+
if media.stage_path and media.stage_path.exists():
|
|
3421
|
+
media.stage_path.rename(restore_path)
|
|
3422
|
+
media.stage_path = None
|
|
3423
|
+
|
|
3424
|
+
|
|
3425
|
+
def convert_image(media: MediaFile) -> None:
|
|
3426
|
+
"""Convert image to JPEG format using ffmpeg.
|
|
3427
|
+
|
|
3428
|
+
Converts directly from source to target without creating backups.
|
|
3429
|
+
If conversion fails, the original file is preserved.
|
|
3430
|
+
"""
|
|
3431
|
+
assert media.stage_path is not None
|
|
3432
|
+
source = media.stage_path
|
|
3433
|
+
target = next_available_name(source.parent, source.stem, ".jpg")
|
|
3434
|
+
|
|
3435
|
+
cmd = [
|
|
3436
|
+
"ffmpeg",
|
|
3437
|
+
"-y",
|
|
3438
|
+
"-i",
|
|
3439
|
+
str(source),
|
|
3440
|
+
"-map_metadata",
|
|
3441
|
+
"0",
|
|
3442
|
+
"-c:v",
|
|
3443
|
+
"mjpeg",
|
|
3444
|
+
"-qscale:v",
|
|
3445
|
+
"2",
|
|
3446
|
+
str(target),
|
|
3447
|
+
]
|
|
3448
|
+
|
|
3449
|
+
try:
|
|
3450
|
+
run_checked(cmd)
|
|
3451
|
+
# Conversion succeeded - delete original, use converted file
|
|
3452
|
+
source.unlink()
|
|
3453
|
+
media.stage_path = target
|
|
3454
|
+
media.extension = ".jpg"
|
|
3455
|
+
media.format_name = "jpeg"
|
|
3456
|
+
media.compatible = True
|
|
3457
|
+
except Exception:
|
|
3458
|
+
# Conversion failed - clean up partial target, keep original
|
|
3459
|
+
with suppress(OSError):
|
|
3460
|
+
if target.exists():
|
|
3461
|
+
target.unlink()
|
|
3462
|
+
raise
|
|
3463
|
+
|
|
3464
|
+
|
|
3465
|
+
def convert_video(media: MediaFile) -> None:
|
|
3466
|
+
"""Convert video to H.264 MP4 format.
|
|
3467
|
+
|
|
3468
|
+
Converts directly from source to target without creating backups.
|
|
3469
|
+
If conversion fails, the original file is preserved.
|
|
3470
|
+
"""
|
|
3471
|
+
assert media.stage_path is not None
|
|
3472
|
+
source = media.stage_path
|
|
3473
|
+
target = next_available_name(source.parent, source.stem, ".mp4")
|
|
3474
|
+
|
|
3475
|
+
cmd = [
|
|
3476
|
+
"ffmpeg",
|
|
3477
|
+
"-y",
|
|
3478
|
+
"-i",
|
|
3479
|
+
str(source),
|
|
3480
|
+
"-map_metadata",
|
|
3481
|
+
"0",
|
|
3482
|
+
"-map",
|
|
3483
|
+
"0:v:0",
|
|
3484
|
+
"-c:v",
|
|
3485
|
+
"libx264",
|
|
3486
|
+
"-preset",
|
|
3487
|
+
"medium",
|
|
3488
|
+
"-crf",
|
|
3489
|
+
"18",
|
|
3490
|
+
"-vf",
|
|
3491
|
+
"scale=trunc(iw/2)*2:trunc(ih/2)*2",
|
|
3492
|
+
"-pix_fmt",
|
|
3493
|
+
"yuv420p",
|
|
3494
|
+
"-movflags",
|
|
3495
|
+
"+faststart",
|
|
3496
|
+
]
|
|
3497
|
+
if media.audio_codec:
|
|
3498
|
+
cmd.extend(["-map", "0:a:0", "-c:a", "aac", "-b:a", "192k"])
|
|
3499
|
+
else:
|
|
3500
|
+
cmd.append("-an")
|
|
3501
|
+
cmd.append(str(target))
|
|
3502
|
+
|
|
3503
|
+
try:
|
|
3504
|
+
run_checked(cmd)
|
|
3505
|
+
# Conversion succeeded - delete original, use converted file
|
|
3506
|
+
source.unlink()
|
|
3507
|
+
media.stage_path = target
|
|
3508
|
+
media.extension = ".mp4"
|
|
3509
|
+
media.format_name = "mp4"
|
|
3510
|
+
media.video_codec = "h264"
|
|
3511
|
+
media.audio_codec = "aac" if media.audio_codec else None
|
|
3512
|
+
media.compatible = True
|
|
3513
|
+
except Exception:
|
|
3514
|
+
# Conversion failed - clean up partial target, keep original
|
|
3515
|
+
with suppress(OSError):
|
|
3516
|
+
if target.exists():
|
|
3517
|
+
target.unlink()
|
|
3518
|
+
raise
|
|
3519
|
+
|
|
3520
|
+
|
|
3521
|
+
def convert_to_png(media: MediaFile) -> None:
|
|
3522
|
+
"""Convert image to PNG format (lossless, widely supported).
|
|
3523
|
+
|
|
3524
|
+
Uses fail-fast approach: no backups, no fallbacks.
|
|
3525
|
+
On success: original file is deleted and media.stage_path updated.
|
|
3526
|
+
On failure: partial target is cleaned up, original remains, exception propagates.
|
|
3527
|
+
"""
|
|
3528
|
+
if media.stage_path is None:
|
|
3529
|
+
raise RuntimeError("Stage path missing for PNG conversion")
|
|
3530
|
+
source = media.stage_path
|
|
3531
|
+
target = next_available_name(source.parent, source.stem, ".png")
|
|
3532
|
+
|
|
3533
|
+
# Use ffmpeg for conversion (handles more formats than ImageMagick)
|
|
3534
|
+
cmd = [
|
|
3535
|
+
"ffmpeg",
|
|
3536
|
+
"-y",
|
|
3537
|
+
"-i",
|
|
3538
|
+
str(source),
|
|
3539
|
+
"-pix_fmt",
|
|
3540
|
+
"rgba",
|
|
3541
|
+
str(target),
|
|
3542
|
+
]
|
|
3543
|
+
try:
|
|
3544
|
+
run_command_with_progress(cmd, "Converting to PNG")
|
|
3545
|
+
copy_metadata_from_source(source, target)
|
|
3546
|
+
source.unlink() # Delete original after successful conversion
|
|
3547
|
+
media.stage_path = target
|
|
3548
|
+
media.extension = ".png"
|
|
3549
|
+
media.format_name = "png"
|
|
3550
|
+
media.requires_processing = False
|
|
3551
|
+
media.compatible = True
|
|
3552
|
+
except Exception:
|
|
3553
|
+
# Clean up partial target, keep original
|
|
3554
|
+
with suppress(OSError):
|
|
3555
|
+
if target.exists():
|
|
3556
|
+
target.unlink()
|
|
3557
|
+
raise
|
|
3558
|
+
|
|
3559
|
+
|
|
3560
|
+
def convert_to_tiff(media: MediaFile) -> None:
|
|
3561
|
+
"""Convert image to TIFF format (lossless, 16-bit depth).
|
|
3562
|
+
|
|
3563
|
+
Uses fail-fast approach: no backups, no fallbacks.
|
|
3564
|
+
On success: original file is deleted and media.stage_path updated.
|
|
3565
|
+
On failure: partial target is cleaned up, original remains, exception propagates.
|
|
3566
|
+
"""
|
|
3567
|
+
if media.stage_path is None:
|
|
3568
|
+
raise RuntimeError("Stage path missing for TIFF conversion")
|
|
3569
|
+
source = media.stage_path
|
|
3570
|
+
target = next_available_name(source.parent, source.stem, ".tiff")
|
|
3571
|
+
|
|
3572
|
+
# Use ImageMagick for conversion with 16-bit depth
|
|
3573
|
+
cmd = [
|
|
3574
|
+
resolve_imagemagick_command(),
|
|
3575
|
+
str(source),
|
|
3576
|
+
"-alpha",
|
|
3577
|
+
"on",
|
|
3578
|
+
"-depth",
|
|
3579
|
+
"16",
|
|
3580
|
+
"-flatten",
|
|
3581
|
+
str(target),
|
|
3582
|
+
]
|
|
3583
|
+
try:
|
|
3584
|
+
run_command_with_progress(cmd, "Converting to TIFF")
|
|
3585
|
+
copy_metadata_from_source(source, target)
|
|
3586
|
+
source.unlink() # Delete original after successful conversion
|
|
3587
|
+
media.stage_path = target
|
|
3588
|
+
media.extension = ".tiff"
|
|
3589
|
+
media.format_name = "tiff"
|
|
3590
|
+
media.requires_processing = False
|
|
3591
|
+
media.compatible = True
|
|
3592
|
+
except Exception:
|
|
3593
|
+
# Clean up partial target, keep original
|
|
3594
|
+
with suppress(OSError):
|
|
3595
|
+
if target.exists():
|
|
3596
|
+
target.unlink()
|
|
3597
|
+
raise
|
|
3598
|
+
|
|
3599
|
+
|
|
3600
|
+
def convert_to_heic_lossless(media: MediaFile) -> None:
|
|
3601
|
+
"""
|
|
3602
|
+
Convert media to lossless HEIC format using heif-enc or ffmpeg.
|
|
3603
|
+
|
|
3604
|
+
Handles JPEG XL sources by first decoding to PNG via djxl, then encoding to HEIC.
|
|
3605
|
+
If djxl is unavailable for JXL input, falls back to TIFF conversion.
|
|
3606
|
+
|
|
3607
|
+
Uses fail-fast approach: no backups, no fallbacks.
|
|
3608
|
+
On success: original file is deleted and media.stage_path updated.
|
|
3609
|
+
On failure: partial target and intermediate files are cleaned up, original remains, exception propagates.
|
|
3610
|
+
"""
|
|
3611
|
+
if media.stage_path is None:
|
|
3612
|
+
raise RuntimeError("Stage path missing for HEIC conversion")
|
|
3613
|
+
source = media.stage_path
|
|
3614
|
+
target = next_available_name(source.parent, source.stem, ".heic")
|
|
3615
|
+
|
|
3616
|
+
intermediate: Optional[Path] = None
|
|
3617
|
+
try:
|
|
3618
|
+
if source.suffix.lower() == ".jxl":
|
|
3619
|
+
djxl = find_executable("djxl")
|
|
3620
|
+
if not djxl:
|
|
3621
|
+
# djxl not available - fall back to TIFF conversion instead
|
|
3622
|
+
LOG.warning("djxl not available; falling back to TIFF conversion")
|
|
3623
|
+
convert_to_tiff(media)
|
|
3624
|
+
return
|
|
3625
|
+
# Decode JXL to intermediate PNG for HEIC encoding
|
|
3626
|
+
fd, tmp_path = tempfile.mkstemp(suffix=".png", prefix="smm_jxl_")
|
|
3627
|
+
os.close(fd)
|
|
3628
|
+
intermediate = Path(tmp_path)
|
|
3629
|
+
run_command_with_progress(
|
|
3630
|
+
[djxl, str(source), str(intermediate), "--lossless"],
|
|
3631
|
+
"Decoding JPEG XL",
|
|
3632
|
+
)
|
|
3633
|
+
source_for_heic = intermediate
|
|
3634
|
+
else:
|
|
3635
|
+
source_for_heic = source
|
|
3636
|
+
|
|
3637
|
+
# Encode to HEIC using heif-enc or ffmpeg
|
|
3638
|
+
heif_enc = find_executable("heif-enc")
|
|
3639
|
+
if heif_enc and source_for_heic.suffix.lower() in {
|
|
3640
|
+
".png",
|
|
3641
|
+
".tif",
|
|
3642
|
+
".tiff",
|
|
3643
|
+
".jpg",
|
|
3644
|
+
".jpeg",
|
|
3645
|
+
".bmp",
|
|
3646
|
+
}:
|
|
3647
|
+
cmd = [heif_enc, "--lossless", str(source_for_heic), str(target)]
|
|
3648
|
+
run_command_with_progress(cmd, "Encoding HEIC (lossless)")
|
|
3649
|
+
else:
|
|
3650
|
+
ffmpeg = ensure_ffmpeg_path()
|
|
3651
|
+
cmd = [
|
|
3652
|
+
ffmpeg,
|
|
3653
|
+
"-y",
|
|
3654
|
+
"-i",
|
|
3655
|
+
str(source_for_heic),
|
|
3656
|
+
"-c:v",
|
|
3657
|
+
"libx265",
|
|
3658
|
+
"-preset",
|
|
3659
|
+
"slow",
|
|
3660
|
+
"-x265-params",
|
|
3661
|
+
"lossless=1",
|
|
3662
|
+
"-pix_fmt",
|
|
3663
|
+
"yuv444p10le",
|
|
3664
|
+
str(target),
|
|
3665
|
+
]
|
|
3666
|
+
run_command_with_progress(cmd, "Encoding HEIC via ffmpeg")
|
|
3667
|
+
|
|
3668
|
+
# Conversion succeeded - copy metadata, delete original, update media
|
|
3669
|
+
copy_metadata_from_source(source, target)
|
|
3670
|
+
source.unlink()
|
|
3671
|
+
media.stage_path = target
|
|
3672
|
+
media.extension = ".heic"
|
|
3673
|
+
media.format_name = "heic"
|
|
3674
|
+
media.requires_processing = False
|
|
3675
|
+
media.compatible = True
|
|
3676
|
+
except Exception:
|
|
3677
|
+
# Clean up partial target and intermediate files, keep original
|
|
3678
|
+
with suppress(OSError):
|
|
3679
|
+
if target.exists():
|
|
3680
|
+
target.unlink()
|
|
3681
|
+
raise
|
|
3682
|
+
finally:
|
|
3683
|
+
# Always clean up intermediate file if created
|
|
3684
|
+
if intermediate and intermediate.exists():
|
|
3685
|
+
with suppress(OSError):
|
|
3686
|
+
intermediate.unlink()
|
|
3687
|
+
|
|
3688
|
+
|
|
3689
|
+
def convert_animation_to_hevc_mp4(media: MediaFile) -> None:
|
|
3690
|
+
"""Convert animated media (GIF, APNG, etc.) to HEVC-encoded MP4 for Photos compatibility.
|
|
3691
|
+
|
|
3692
|
+
Uses lossless HEVC encoding with 10-bit YUV444 color space to preserve visual quality.
|
|
3693
|
+
Removes audio tracks as Photos does not support audio in animated images.
|
|
3694
|
+
Converts in-place by overwriting the original stage file.
|
|
3695
|
+
Fails fast on any error - no backups, no rollbacks.
|
|
3696
|
+
|
|
3697
|
+
Args:
|
|
3698
|
+
media: MediaFile object with stage_path set to the file to convert
|
|
3699
|
+
|
|
3700
|
+
Raises:
|
|
3701
|
+
RuntimeError: If stage_path is None
|
|
3702
|
+
CalledProcessError: If ffmpeg conversion fails
|
|
3703
|
+
"""
|
|
3704
|
+
if media.stage_path is None:
|
|
3705
|
+
raise RuntimeError("Stage path missing for animation conversion")
|
|
3706
|
+
original_stage = media.stage_path # Source file to convert in-place
|
|
3707
|
+
target = next_available_name(original_stage.parent, original_stage.stem, ".mp4") # Target extension is .mp4
|
|
3708
|
+
ffmpeg = ensure_ffmpeg_path()
|
|
3709
|
+
cmd = [
|
|
3710
|
+
ffmpeg,
|
|
3711
|
+
"-y", # Overwrite output file
|
|
3712
|
+
"-i",
|
|
3713
|
+
str(original_stage), # Use original stage directly as input
|
|
3714
|
+
"-vf",
|
|
3715
|
+
"scale=trunc(iw/2)*2:trunc(ih/2)*2", # Ensure even dimensions for HEVC
|
|
3716
|
+
"-c:v",
|
|
3717
|
+
"libx265", # HEVC video codec
|
|
3718
|
+
"-preset",
|
|
3719
|
+
"slow", # Better compression at cost of encoding time
|
|
3720
|
+
"-x265-params",
|
|
3721
|
+
"lossless=1", # Lossless encoding to preserve quality
|
|
3722
|
+
"-pix_fmt",
|
|
3723
|
+
"yuv444p10le", # 10-bit color for animations
|
|
3724
|
+
"-an", # Remove audio tracks
|
|
3725
|
+
str(target),
|
|
3726
|
+
]
|
|
3727
|
+
run_command_with_progress(cmd, "Converting animation to HEVC") # No try-except, fail fast
|
|
3728
|
+
original_stage.unlink() # Delete original file after successful conversion
|
|
3729
|
+
media.stage_path = target # Update to new converted file
|
|
3730
|
+
media.extension = ".mp4" # Target extension
|
|
3731
|
+
media.format_name = "mp4" # Format name for mp4 container
|
|
3732
|
+
media.video_codec = "hevc"
|
|
3733
|
+
media.audio_codec = None # Audio removed
|
|
3734
|
+
media.kind = "video"
|
|
3735
|
+
media.requires_processing = False
|
|
3736
|
+
media.compatible = True
|
|
3737
|
+
|
|
3738
|
+
|
|
3739
|
+
def rewrap_to_mp4(media: MediaFile) -> None:
|
|
3740
|
+
"""Rewrap media file to MP4 container without re-encoding.
|
|
3741
|
+
|
|
3742
|
+
Converts the container format to MP4 while copying all streams and metadata
|
|
3743
|
+
without transcoding. Uses faststart flag for web-optimized playback.
|
|
3744
|
+
Fails fast on any error - no backups, no rollbacks.
|
|
3745
|
+
|
|
3746
|
+
Args:
|
|
3747
|
+
media: MediaFile instance with valid stage_path
|
|
3748
|
+
|
|
3749
|
+
Raises:
|
|
3750
|
+
RuntimeError: If stage_path is missing
|
|
3751
|
+
subprocess.CalledProcessError: If ffmpeg command fails
|
|
3752
|
+
"""
|
|
3753
|
+
if media.stage_path is None:
|
|
3754
|
+
raise RuntimeError("Stage path missing for rewrap")
|
|
3755
|
+
original_stage = media.stage_path
|
|
3756
|
+
target = next_available_name(original_stage.parent, original_stage.stem, ".mp4")
|
|
3757
|
+
ffmpeg = ensure_ffmpeg_path()
|
|
3758
|
+
cmd = [
|
|
3759
|
+
ffmpeg,
|
|
3760
|
+
"-y",
|
|
3761
|
+
"-i",
|
|
3762
|
+
str(original_stage),
|
|
3763
|
+
"-c",
|
|
3764
|
+
"copy",
|
|
3765
|
+
"-map",
|
|
3766
|
+
"0",
|
|
3767
|
+
"-map_metadata",
|
|
3768
|
+
"0",
|
|
3769
|
+
"-movflags",
|
|
3770
|
+
"+faststart",
|
|
3771
|
+
str(target),
|
|
3772
|
+
]
|
|
3773
|
+
run_command_with_progress(cmd, "Rewrapping container")
|
|
3774
|
+
original_stage.unlink() # Delete original after successful rewrap
|
|
3775
|
+
media.stage_path = target
|
|
3776
|
+
media.extension = ".mp4"
|
|
3777
|
+
media.format_name = "mp4"
|
|
3778
|
+
media.requires_processing = False
|
|
3779
|
+
media.compatible = True
|
|
3780
|
+
|
|
3781
|
+
|
|
3782
|
+
def transcode_to_hevc_mp4(media: MediaFile, copy_audio: bool = False) -> None:
|
|
3783
|
+
"""Transcode video to HEVC (H.265) in MP4 container with optional audio handling.
|
|
3784
|
+
|
|
3785
|
+
Converts the staged media file to HEVC video codec with lossless encoding parameters.
|
|
3786
|
+
Audio can either be copied from source or re-encoded to AAC 256k.
|
|
3787
|
+
Updates the media object with new format metadata and marks it as compatible.
|
|
3788
|
+
|
|
3789
|
+
Uses fail-fast approach: no backups, no fallbacks.
|
|
3790
|
+
On success: original file is deleted and media.stage_path updated to target.
|
|
3791
|
+
On failure: partial target is cleaned up, original remains, exception propagates.
|
|
3792
|
+
|
|
3793
|
+
Args:
|
|
3794
|
+
media: MediaFile object with valid stage_path to be transcoded
|
|
3795
|
+
copy_audio: If True, copy audio stream as-is; if False, transcode to AAC 256k
|
|
3796
|
+
|
|
3797
|
+
Raises:
|
|
3798
|
+
RuntimeError: If media.stage_path is None
|
|
3799
|
+
Exception: If ffmpeg transcoding fails (failure propagates after cleanup)
|
|
3800
|
+
"""
|
|
3801
|
+
if media.stage_path is None:
|
|
3802
|
+
raise RuntimeError("Stage path missing for transcode")
|
|
3803
|
+
source = media.stage_path
|
|
3804
|
+
target = next_available_name(source.parent, source.stem, ".mp4")
|
|
3805
|
+
ffmpeg = ensure_ffmpeg_path()
|
|
3806
|
+
cmd = [
|
|
3807
|
+
ffmpeg,
|
|
3808
|
+
"-y",
|
|
3809
|
+
"-i",
|
|
3810
|
+
str(source),
|
|
3811
|
+
"-c:v",
|
|
3812
|
+
"libx265",
|
|
3813
|
+
"-preset",
|
|
3814
|
+
"slow",
|
|
3815
|
+
"-x265-params",
|
|
3816
|
+
"lossless=1",
|
|
3817
|
+
"-pix_fmt",
|
|
3818
|
+
"yuv420p10le",
|
|
3819
|
+
"-map_metadata",
|
|
3820
|
+
"0",
|
|
3821
|
+
]
|
|
3822
|
+
if copy_audio:
|
|
3823
|
+
cmd.extend(["-c:a", "copy"])
|
|
3824
|
+
else:
|
|
3825
|
+
cmd.extend(["-c:a", "aac", "-b:a", "256k"])
|
|
3826
|
+
cmd.append(str(target))
|
|
3827
|
+
try:
|
|
3828
|
+
run_command_with_progress(cmd, "Transcoding to HEVC")
|
|
3829
|
+
# Transcoding succeeded - delete original, use transcoded file
|
|
3830
|
+
source.unlink()
|
|
3831
|
+
media.stage_path = target
|
|
3832
|
+
media.extension = ".mp4"
|
|
3833
|
+
media.format_name = "mp4"
|
|
3834
|
+
media.video_codec = "hevc"
|
|
3835
|
+
media.audio_codec = media.audio_codec if copy_audio else "aac"
|
|
3836
|
+
media.requires_processing = False
|
|
3837
|
+
media.compatible = True
|
|
3838
|
+
except Exception:
|
|
3839
|
+
# Transcoding failed - clean up partial target, keep original
|
|
3840
|
+
with suppress(OSError):
|
|
3841
|
+
if target.exists():
|
|
3842
|
+
target.unlink()
|
|
3843
|
+
raise
|
|
3844
|
+
|
|
3845
|
+
|
|
3846
|
+
def transcode_audio_to_supported(media: MediaFile) -> None:
|
|
3847
|
+
"""Transcode audio to supported codec (AAC or EAC3) in MP4 container.
|
|
3848
|
+
|
|
3849
|
+
Converts directly from source to target without creating backups.
|
|
3850
|
+
If conversion fails, the original file is preserved.
|
|
3851
|
+
Uses EAC3 for 5.1/7.1 surround sound, AAC for stereo/mono.
|
|
3852
|
+
"""
|
|
3853
|
+
assert media.stage_path is not None
|
|
3854
|
+
source = media.stage_path
|
|
3855
|
+
target = next_available_name(source.parent, source.stem, ".mp4")
|
|
3856
|
+
ffmpeg = ensure_ffmpeg_path()
|
|
3857
|
+
channels = int(media.metadata.get("audio_channels", 0) or 0)
|
|
3858
|
+
layout = str(media.metadata.get("audio_layout", "") or "").lower()
|
|
3859
|
+
if channels >= 6 or "7.1" in layout or "5.1" in layout:
|
|
3860
|
+
audio_codec = "eac3"
|
|
3861
|
+
audio_args = ["-c:a", "eac3", "-b:a", "768k"]
|
|
3862
|
+
else:
|
|
3863
|
+
audio_codec = "aac"
|
|
3864
|
+
audio_args = ["-c:a", "aac", "-b:a", "256k"]
|
|
3865
|
+
cmd = (
|
|
3866
|
+
[
|
|
3867
|
+
ffmpeg,
|
|
3868
|
+
"-y",
|
|
3869
|
+
"-i",
|
|
3870
|
+
str(source),
|
|
3871
|
+
"-c:v",
|
|
3872
|
+
"copy",
|
|
3873
|
+
]
|
|
3874
|
+
+ audio_args
|
|
3875
|
+
+ [
|
|
3876
|
+
"-map_metadata",
|
|
3877
|
+
"0",
|
|
3878
|
+
str(target),
|
|
3879
|
+
]
|
|
3880
|
+
)
|
|
3881
|
+
try:
|
|
3882
|
+
run_command_with_progress(cmd, "Normalising audio codec")
|
|
3883
|
+
# Conversion succeeded - delete original, use converted file
|
|
3884
|
+
source.unlink()
|
|
3885
|
+
media.stage_path = target
|
|
3886
|
+
media.extension = ".mp4"
|
|
3887
|
+
media.format_name = "mp4"
|
|
3888
|
+
media.audio_codec = audio_codec
|
|
3889
|
+
media.requires_processing = False
|
|
3890
|
+
media.compatible = True
|
|
3891
|
+
except Exception:
|
|
3892
|
+
# Conversion failed - clean up partial target, keep original
|
|
3893
|
+
with suppress(OSError):
|
|
3894
|
+
if target.exists():
|
|
3895
|
+
target.unlink()
|
|
3896
|
+
raise
|
|
3897
|
+
|
|
3898
|
+
|
|
3899
|
+
def rewrap_or_transcode_to_mp4(media: MediaFile) -> None:
|
|
3900
|
+
"""Rewrap video to MP4 container, transcode to HEVC on failure.
|
|
3901
|
+
|
|
3902
|
+
Uses fail-fast approach: no backups, no fallbacks.
|
|
3903
|
+
First attempts fast rewrap (copy streams), if that fails tries transcode.
|
|
3904
|
+
On success: original file is deleted and media.stage_path updated.
|
|
3905
|
+
On failure: partial target is cleaned up, original remains, exception propagates.
|
|
3906
|
+
"""
|
|
3907
|
+
if media.stage_path is None:
|
|
3908
|
+
raise RuntimeError("Stage path missing for rewrap/transcode")
|
|
3909
|
+
source = media.stage_path
|
|
3910
|
+
target = next_available_name(source.parent, source.stem, ".mp4")
|
|
3911
|
+
ffmpeg = ensure_ffmpeg_path()
|
|
3912
|
+
|
|
3913
|
+
# First attempt: fast rewrap (copy all streams)
|
|
3914
|
+
rewrap_cmd = [
|
|
3915
|
+
ffmpeg,
|
|
3916
|
+
"-y",
|
|
3917
|
+
"-i",
|
|
3918
|
+
str(source),
|
|
3919
|
+
"-c",
|
|
3920
|
+
"copy",
|
|
3921
|
+
"-map",
|
|
3922
|
+
"0",
|
|
3923
|
+
"-map_metadata",
|
|
3924
|
+
"0",
|
|
3925
|
+
"-movflags",
|
|
3926
|
+
"+faststart",
|
|
3927
|
+
str(target),
|
|
3928
|
+
]
|
|
3929
|
+
|
|
3930
|
+
try:
|
|
3931
|
+
run_command_with_progress(rewrap_cmd, "Rewrapping to MP4")
|
|
3932
|
+
source.unlink() # Delete original after successful rewrap
|
|
3933
|
+
media.stage_path = target
|
|
3934
|
+
media.extension = ".mp4"
|
|
3935
|
+
media.format_name = "mp4"
|
|
3936
|
+
media.requires_processing = False
|
|
3937
|
+
media.compatible = True
|
|
3938
|
+
return
|
|
3939
|
+
except Exception:
|
|
3940
|
+
# Rewrap failed, clean up and try transcode
|
|
3941
|
+
with suppress(OSError):
|
|
3942
|
+
if target.exists():
|
|
3943
|
+
target.unlink()
|
|
3944
|
+
|
|
3945
|
+
# Second attempt: transcode to HEVC
|
|
3946
|
+
target = next_available_name(source.parent, source.stem, ".mp4")
|
|
3947
|
+
transcode_cmd = [
|
|
3948
|
+
ffmpeg,
|
|
3949
|
+
"-y",
|
|
3950
|
+
"-i",
|
|
3951
|
+
str(source),
|
|
3952
|
+
"-c:v",
|
|
3953
|
+
"libx265",
|
|
3954
|
+
"-preset",
|
|
3955
|
+
"medium",
|
|
3956
|
+
"-crf",
|
|
3957
|
+
"23",
|
|
3958
|
+
"-pix_fmt",
|
|
3959
|
+
"yuv420p",
|
|
3960
|
+
"-c:a",
|
|
3961
|
+
"aac",
|
|
3962
|
+
"-b:a",
|
|
3963
|
+
"192k",
|
|
3964
|
+
"-movflags",
|
|
3965
|
+
"+faststart",
|
|
3966
|
+
str(target),
|
|
3967
|
+
]
|
|
3968
|
+
|
|
3969
|
+
try:
|
|
3970
|
+
run_command_with_progress(transcode_cmd, "Transcoding to HEVC MP4")
|
|
3971
|
+
source.unlink() # Delete original after successful transcode
|
|
3972
|
+
media.stage_path = target
|
|
3973
|
+
media.extension = ".mp4"
|
|
3974
|
+
media.format_name = "mp4"
|
|
3975
|
+
media.requires_processing = False
|
|
3976
|
+
media.compatible = True
|
|
3977
|
+
except Exception:
|
|
3978
|
+
# Clean up partial target, keep original
|
|
3979
|
+
with suppress(OSError):
|
|
3980
|
+
if target.exists():
|
|
3981
|
+
target.unlink()
|
|
3982
|
+
raise
|
|
3983
|
+
|
|
3984
|
+
|
|
3985
|
+
def skip_unknown_video(media: MediaFile, skip_logger: SkipLogger) -> bool:
|
|
3986
|
+
skip_logger.log(media.source, "unsupported video format")
|
|
3987
|
+
restore_media_file(media)
|
|
3988
|
+
return False
|
|
3989
|
+
|
|
3990
|
+
|
|
3991
|
+
def resolve_restore_path(path: Path) -> Path:
|
|
3992
|
+
if not path.exists():
|
|
3993
|
+
return path
|
|
3994
|
+
return next_available_name(path.parent, path.stem, path.suffix)
|
|
3995
|
+
|
|
3996
|
+
|
|
3997
|
+
def revert_media_files(media_files: Iterable[MediaFile], staging: Optional[Path]) -> None:
|
|
3998
|
+
for media in media_files:
|
|
3999
|
+
original = media.source
|
|
4000
|
+
try:
|
|
4001
|
+
if media.stage_path and media.stage_path.exists():
|
|
4002
|
+
if media.metadata.get("copy_mode"):
|
|
4003
|
+
media.stage_path.unlink(missing_ok=True)
|
|
4004
|
+
media.stage_path = None
|
|
4005
|
+
continue
|
|
4006
|
+
restore_path = resolve_restore_path(original)
|
|
4007
|
+
restore_path.parent.mkdir(parents=True, exist_ok=True)
|
|
4008
|
+
media.stage_path.rename(restore_path)
|
|
4009
|
+
media.stage_path = None
|
|
4010
|
+
except Exception as exc: # noqa: BLE001
|
|
4011
|
+
LOG.warning("Failed to restore %s: %s", original, exc)
|
|
4012
|
+
if staging and staging.exists():
|
|
4013
|
+
shutil.rmtree(staging, ignore_errors=True)
|
|
4014
|
+
|
|
4015
|
+
|
|
4016
|
+
def ensure_compatibility(
|
|
4017
|
+
media_files: list[MediaFile],
|
|
4018
|
+
skip_logger: SkipLogger,
|
|
4019
|
+
stats: RunStatistics,
|
|
4020
|
+
skip_convert: bool = False,
|
|
4021
|
+
) -> None:
|
|
4022
|
+
retained: list[MediaFile] = []
|
|
4023
|
+
progress = ProgressReporter(len(media_files), "Ensuring compatibility")
|
|
4024
|
+
|
|
4025
|
+
def is_already_photos_compatible(media: MediaFile) -> bool:
|
|
4026
|
+
if media.kind == "image":
|
|
4027
|
+
return media.extension.lower() in COMPATIBLE_IMAGE_EXTENSIONS
|
|
4028
|
+
if media.kind == "video":
|
|
4029
|
+
container = (media.metadata.get("container") or media.format_name or "").lower()
|
|
4030
|
+
video_codec = (media.video_codec or "").lower()
|
|
4031
|
+
audio_codec = (media.audio_codec or "").lower()
|
|
4032
|
+
return container in COMPATIBLE_VIDEO_CONTAINERS and video_codec in COMPATIBLE_VIDEO_CODECS and (not audio_codec or audio_codec in COMPATIBLE_AUDIO_CODECS)
|
|
4033
|
+
return False
|
|
4034
|
+
|
|
4035
|
+
for media in media_files:
|
|
4036
|
+
if media.stage_path is None or not media.stage_path.exists():
|
|
4037
|
+
skip_logger.log(media.source, "staged file missing before processing")
|
|
4038
|
+
progress.update()
|
|
4039
|
+
continue
|
|
4040
|
+
|
|
4041
|
+
if media.action == "skip_vector":
|
|
4042
|
+
skip_logger.log(media.source, "vector artwork not supported")
|
|
4043
|
+
restore_media_file(media)
|
|
4044
|
+
progress.update()
|
|
4045
|
+
continue
|
|
4046
|
+
|
|
4047
|
+
if media.action == "skip_unknown_video":
|
|
4048
|
+
if not skip_unknown_video(media, skip_logger):
|
|
4049
|
+
progress.update()
|
|
4050
|
+
continue
|
|
4051
|
+
|
|
4052
|
+
# Skip all conversions if flag is set (for format testing)
|
|
4053
|
+
if skip_convert:
|
|
4054
|
+
# Mark as compatible and treat as import-ready
|
|
4055
|
+
media.requires_processing = False
|
|
4056
|
+
media.compatible = True
|
|
4057
|
+
retained.append(media)
|
|
4058
|
+
progress.update()
|
|
4059
|
+
continue
|
|
4060
|
+
|
|
4061
|
+
try:
|
|
4062
|
+
# Do not process files the detector already marked as compatible
|
|
4063
|
+
if media.detected_compatible and media.action != "import":
|
|
4064
|
+
LOG.debug("Bypassing processing for compatible media %s (action %s)", media.stage_path, media.action)
|
|
4065
|
+
media.requires_processing = False
|
|
4066
|
+
media.compatible = True
|
|
4067
|
+
media.action = "import"
|
|
4068
|
+
retained.append(media)
|
|
4069
|
+
progress.update()
|
|
4070
|
+
continue
|
|
4071
|
+
|
|
4072
|
+
# Extra guard: heuristically skip conversion if container/codec are Photos-compatible
|
|
4073
|
+
if is_already_photos_compatible(media):
|
|
4074
|
+
media.requires_processing = False
|
|
4075
|
+
media.compatible = True
|
|
4076
|
+
media.action = "import"
|
|
4077
|
+
retained.append(media)
|
|
4078
|
+
progress.update()
|
|
4079
|
+
continue
|
|
4080
|
+
|
|
4081
|
+
if media.action == "import":
|
|
4082
|
+
media.requires_processing = False
|
|
4083
|
+
media.compatible = True
|
|
4084
|
+
elif media.action == "convert_to_png":
|
|
4085
|
+
LOG.debug("Converting %s to PNG: %s", media.format_name, media.stage_path)
|
|
4086
|
+
stats.conversion_attempted += 1
|
|
4087
|
+
convert_to_png(media)
|
|
4088
|
+
stats.conversion_succeeded += 1
|
|
4089
|
+
media.was_converted = True
|
|
4090
|
+
LOG.debug("Successfully converted to PNG: %s", media.stage_path)
|
|
4091
|
+
elif media.action == "convert_to_tiff":
|
|
4092
|
+
LOG.debug("Converting %s to TIFF: %s", media.format_name, media.stage_path)
|
|
4093
|
+
stats.conversion_attempted += 1
|
|
4094
|
+
convert_to_tiff(media)
|
|
4095
|
+
stats.conversion_succeeded += 1
|
|
4096
|
+
media.was_converted = True
|
|
4097
|
+
LOG.debug("Successfully converted to TIFF: %s", media.stage_path)
|
|
4098
|
+
elif media.action == "convert_to_heic_lossless":
|
|
4099
|
+
LOG.debug("Converting %s to lossless HEIC: %s", media.format_name, media.stage_path)
|
|
4100
|
+
stats.conversion_attempted += 1
|
|
4101
|
+
convert_to_heic_lossless(media)
|
|
4102
|
+
stats.conversion_succeeded += 1
|
|
4103
|
+
media.was_converted = True
|
|
4104
|
+
LOG.debug("Successfully converted to HEIC: %s", media.stage_path)
|
|
4105
|
+
elif media.action == "convert_animation_to_hevc_mp4":
|
|
4106
|
+
LOG.debug("Converting animated %s to HEVC MP4: %s", media.format_name, media.stage_path)
|
|
4107
|
+
stats.conversion_attempted += 1
|
|
4108
|
+
convert_animation_to_hevc_mp4(media)
|
|
4109
|
+
stats.conversion_succeeded += 1
|
|
4110
|
+
media.was_converted = True
|
|
4111
|
+
LOG.debug("Successfully converted animation to HEVC MP4: %s", media.stage_path)
|
|
4112
|
+
elif media.action == "rewrap_to_mp4":
|
|
4113
|
+
LOG.debug("Rewrapping %s (%s/%s) to MP4 container: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
|
|
4114
|
+
stats.conversion_attempted += 1
|
|
4115
|
+
rewrap_to_mp4(media)
|
|
4116
|
+
stats.conversion_succeeded += 1
|
|
4117
|
+
media.was_converted = True
|
|
4118
|
+
LOG.debug("Successfully rewrapped to MP4: %s", media.stage_path)
|
|
4119
|
+
elif media.action == "transcode_to_hevc_mp4":
|
|
4120
|
+
LOG.debug("Transcoding %s (%s/%s) to HEVC MP4: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
|
|
4121
|
+
stats.conversion_attempted += 1
|
|
4122
|
+
transcode_to_hevc_mp4(media, copy_audio=False)
|
|
4123
|
+
stats.conversion_succeeded += 1
|
|
4124
|
+
media.was_converted = True
|
|
4125
|
+
LOG.debug("Successfully transcoded to HEVC MP4: %s", media.stage_path)
|
|
4126
|
+
elif media.action == "transcode_video_to_lossless_hevc":
|
|
4127
|
+
LOG.debug("Transcoding %s (%s/%s) to lossless HEVC MP4: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
|
|
4128
|
+
stats.conversion_attempted += 1
|
|
4129
|
+
transcode_to_hevc_mp4(media, copy_audio=True)
|
|
4130
|
+
stats.conversion_succeeded += 1
|
|
4131
|
+
media.was_converted = True
|
|
4132
|
+
LOG.debug("Successfully transcoded to lossless HEVC MP4: %s", media.stage_path)
|
|
4133
|
+
elif media.action == "transcode_audio_to_aac_or_eac3":
|
|
4134
|
+
LOG.debug("Transcoding audio in %s (%s) to AAC/EAC-3: %s", media.format_name, media.audio_codec or "unknown", media.stage_path)
|
|
4135
|
+
stats.conversion_attempted += 1
|
|
4136
|
+
transcode_audio_to_supported(media)
|
|
4137
|
+
stats.conversion_succeeded += 1
|
|
4138
|
+
media.was_converted = True
|
|
4139
|
+
LOG.debug("Successfully transcoded audio: %s", media.stage_path)
|
|
4140
|
+
elif media.action == "rewrap_or_transcode_to_mp4":
|
|
4141
|
+
LOG.debug("Rewrapping/transcoding %s (%s/%s) to MP4: %s", media.format_name, media.video_codec or "unknown", media.audio_codec or "unknown", media.stage_path)
|
|
4142
|
+
stats.conversion_attempted += 1
|
|
4143
|
+
rewrap_or_transcode_to_mp4(media)
|
|
4144
|
+
stats.conversion_succeeded += 1
|
|
4145
|
+
media.was_converted = True
|
|
4146
|
+
LOG.debug("Successfully converted to MP4: %s", media.stage_path)
|
|
4147
|
+
else:
|
|
4148
|
+
# Default: keep and log unknown action
|
|
4149
|
+
skip_logger.log(media.source, f"unhandled action {media.action}, treating as import")
|
|
4150
|
+
media.requires_processing = False
|
|
4151
|
+
media.compatible = True
|
|
4152
|
+
except Exception as exc: # noqa: BLE001
|
|
4153
|
+
stats.conversion_failed += 1
|
|
4154
|
+
skip_logger.log(media.source, f"processing failed: {exc}")
|
|
4155
|
+
restore_media_file(media)
|
|
4156
|
+
progress.update()
|
|
4157
|
+
continue
|
|
4158
|
+
|
|
4159
|
+
retained.append(media)
|
|
4160
|
+
progress.update()
|
|
4161
|
+
|
|
4162
|
+
media_files[:] = retained
|
|
4163
|
+
progress.finish()
|
|
4164
|
+
|
|
4165
|
+
|
|
4166
|
+
def update_stats_after_compatibility(stats: RunStatistics, media_files: list[MediaFile]) -> None:
|
|
4167
|
+
stats.total_media_detected = len(media_files)
|
|
4168
|
+
detected_compatible = sum(1 for media in media_files if media.detected_compatible)
|
|
4169
|
+
stats.media_compatible = detected_compatible
|
|
4170
|
+
stats.media_incompatible = stats.total_media_detected - detected_compatible
|
|
4171
|
+
stats.incompatible_with_conversion_rule = sum(1 for media in media_files if not media.detected_compatible and media.was_converted)
|
|
4172
|
+
stats.staging_total = sum(1 for media in media_files if media.stage_path and media.stage_path.exists())
|
|
4173
|
+
stats.staging_expected = detected_compatible + stats.incompatible_with_conversion_rule
|
|
4174
|
+
|
|
4175
|
+
|
|
4176
|
+
def run_checked(cmd: list[str]) -> None:
|
|
4177
|
+
LOG.debug("Executing command: %s", " ".join(cmd))
|
|
4178
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
|
4179
|
+
if result.returncode != 0:
|
|
4180
|
+
LOG.error("Command failed: %s", result.stderr.strip())
|
|
4181
|
+
raise RuntimeError(f"Command '{cmd[0]}' failed with exit code {result.returncode}.")
|
|
4182
|
+
|
|
4183
|
+
|
|
4184
|
+
def import_folder_to_photos(
|
|
4185
|
+
staging_dir: Path,
|
|
4186
|
+
media_files: list[MediaFile],
|
|
4187
|
+
album_name: str,
|
|
4188
|
+
skip_duplicates: bool = True,
|
|
4189
|
+
) -> tuple[int, int, list[MediaFile]]:
|
|
4190
|
+
"""Import entire staging folder in a single Photos.app call.
|
|
4191
|
+
|
|
4192
|
+
Uses Photos' native folder import which handles queue management natively.
|
|
4193
|
+
Returns imported filenames and reconciles against staged files to determine
|
|
4194
|
+
which files were imported vs skipped.
|
|
4195
|
+
|
|
4196
|
+
This eliminates ALL timing dependencies and batch management complexity.
|
|
4197
|
+
Photos.app manages its own import queue, preventing resource exhaustion.
|
|
4198
|
+
|
|
4199
|
+
Args:
|
|
4200
|
+
staging_dir: Path to staging folder containing all media files
|
|
4201
|
+
media_files: List of MediaFile objects with stage_path set
|
|
4202
|
+
album_name: Photos album name to import into
|
|
4203
|
+
skip_duplicates: If True, skip duplicate checking (default: True)
|
|
4204
|
+
|
|
4205
|
+
Returns:
|
|
4206
|
+
Tuple of (imported_count, skipped_count, skipped_media_files)
|
|
4207
|
+
|
|
4208
|
+
Raises:
|
|
4209
|
+
RuntimeError: If Photos.app import fails with error
|
|
4210
|
+
"""
|
|
4211
|
+
# DEBUG: Timestamp when function execution begins
|
|
4212
|
+
function_start_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
4213
|
+
LOG.debug("✅ TIMESTAMP %s - import_folder_to_photos function EXECUTION STARTED", function_start_timestamp)
|
|
4214
|
+
|
|
4215
|
+
staged_media = [media for media in media_files if media.stage_path and media.stage_path.exists()]
|
|
4216
|
+
if not staged_media:
|
|
4217
|
+
return 0, 0, []
|
|
4218
|
+
|
|
4219
|
+
# Build AppleScript for folder import (based on import2photos.sh)
|
|
4220
|
+
# Uses 24-hour timeout to prevent AppleEvent timeout (-1712) when Photos shows dialogs
|
|
4221
|
+
applescript = """
|
|
4222
|
+
on run argv
|
|
4223
|
+
if (count of argv) < 3 then return "ERR\\t0\\tMissing arguments"
|
|
4224
|
+
set albumName to item 1 of argv
|
|
4225
|
+
set skipDup to ((item 2 of argv) is "true")
|
|
4226
|
+
set dirPath to item 3 of argv
|
|
4227
|
+
|
|
4228
|
+
script util
|
|
4229
|
+
on sanitizeText(srcText)
|
|
4230
|
+
set oldTIDs to AppleScript's text item delimiters
|
|
4231
|
+
set AppleScript's text item delimiters to {return, linefeed, tab}
|
|
4232
|
+
set parts to text items of srcText
|
|
4233
|
+
set AppleScript's text item delimiters to " "
|
|
4234
|
+
set out to parts as text
|
|
4235
|
+
set AppleScript's text item delimiters to oldTIDs
|
|
4236
|
+
return out
|
|
4237
|
+
end sanitizeText
|
|
4238
|
+
end script
|
|
4239
|
+
|
|
4240
|
+
try
|
|
4241
|
+
set folderAlias to POSIX file (dirPath as text)
|
|
4242
|
+
on error errMsg number errNum
|
|
4243
|
+
return "ERR\\t" & (errNum as text) & "\\t" & errMsg
|
|
4244
|
+
end try
|
|
4245
|
+
|
|
4246
|
+
set outLines to {}
|
|
4247
|
+
tell application id "com.apple.Photos"
|
|
4248
|
+
activate
|
|
4249
|
+
-- Use very long timeout (24 hours) to allow user interaction with Photos dialogs
|
|
4250
|
+
with timeout of 86400 seconds
|
|
4251
|
+
try
|
|
4252
|
+
if (count of (albums whose name is albumName)) = 0 then
|
|
4253
|
+
make new album named albumName
|
|
4254
|
+
end if
|
|
4255
|
+
set tgtAlbum to first album whose name is albumName
|
|
4256
|
+
|
|
4257
|
+
-- SINGLE folder import call - Photos manages the queue natively
|
|
4258
|
+
set importedItems to import folderAlias skip check duplicates skipDup
|
|
4259
|
+
|
|
4260
|
+
if (count of importedItems) > 0 then
|
|
4261
|
+
add importedItems to tgtAlbum
|
|
4262
|
+
end if
|
|
4263
|
+
|
|
4264
|
+
-- Return filenames of imported items for reconciliation
|
|
4265
|
+
repeat with mi in importedItems
|
|
4266
|
+
try
|
|
4267
|
+
set fn to filename of mi
|
|
4268
|
+
set fn2 to util's sanitizeText(fn)
|
|
4269
|
+
set end of outLines to "FN\\t" & fn2
|
|
4270
|
+
end try
|
|
4271
|
+
end repeat
|
|
4272
|
+
on error errMsg number errNum
|
|
4273
|
+
return "ERR\\t" & (errNum as text) & "\\t" & errMsg
|
|
4274
|
+
end try
|
|
4275
|
+
end timeout
|
|
4276
|
+
end tell
|
|
4277
|
+
|
|
4278
|
+
set oldTIDs to AppleScript's text item delimiters
|
|
4279
|
+
set AppleScript's text item delimiters to linefeed
|
|
4280
|
+
set outText to outLines as text
|
|
4281
|
+
set AppleScript's text item delimiters to oldTIDs
|
|
4282
|
+
return outText
|
|
4283
|
+
end run
|
|
4284
|
+
"""
|
|
4285
|
+
|
|
4286
|
+
# Execute AppleScript with folder import - with retry logic for Photos dialogs
|
|
4287
|
+
LOG.info("Importing staging folder into Photos album '%s'...", album_name)
|
|
4288
|
+
|
|
4289
|
+
def run_import_applescript() -> subprocess.CompletedProcess[str]:
|
|
4290
|
+
"""Execute the import AppleScript. No timeout - AppleScript has its own 24h timeout."""
|
|
4291
|
+
return subprocess.run(
|
|
4292
|
+
["osascript", "-", album_name, str(skip_duplicates).lower(), str(staging_dir)],
|
|
4293
|
+
input=applescript,
|
|
4294
|
+
capture_output=True,
|
|
4295
|
+
text=True,
|
|
4296
|
+
check=False,
|
|
4297
|
+
)
|
|
4298
|
+
|
|
4299
|
+
# Retry loop for AppleEvent timeout (-1712) when Photos shows dialogs
|
|
4300
|
+
max_retries = 10
|
|
4301
|
+
for attempt in range(max_retries):
|
|
4302
|
+
# DEBUG: Timestamp when AppleScript execution begins
|
|
4303
|
+
applescript_start_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
4304
|
+
LOG.debug("📸 TIMESTAMP %s - About to execute AppleScript (osascript) to import folder to Photos.app (attempt %d/%d)", applescript_start_timestamp, attempt + 1, max_retries)
|
|
4305
|
+
|
|
4306
|
+
result = run_import_applescript()
|
|
4307
|
+
|
|
4308
|
+
# DEBUG: Timestamp when AppleScript execution completes
|
|
4309
|
+
applescript_end_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
4310
|
+
LOG.debug("📸 TIMESTAMP %s - AppleScript execution COMPLETED", applescript_end_timestamp)
|
|
4311
|
+
|
|
4312
|
+
output = result.stdout.strip()
|
|
4313
|
+
|
|
4314
|
+
if LOG.isEnabledFor(logging.DEBUG):
|
|
4315
|
+
debug_parent = _log_directory() or staging_dir.parent
|
|
4316
|
+
timestamp_segment = dt.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
4317
|
+
debug_parent.mkdir(parents=True, exist_ok=True)
|
|
4318
|
+
raw_output_file = debug_parent / f"DEBUG_raw_applescript_output_{timestamp_segment}.txt"
|
|
4319
|
+
with raw_output_file.open("wb") as binary_handle:
|
|
4320
|
+
binary_handle.write(result.stdout.encode("utf-8"))
|
|
4321
|
+
LOG.debug("DEBUG: Raw AppleScript output saved to %s (%d bytes)", raw_output_file, len(result.stdout))
|
|
4322
|
+
|
|
4323
|
+
# Check for AppleEvent timeout error (-1712) - Photos was showing a dialog
|
|
4324
|
+
if output.startswith("ERR\t"):
|
|
4325
|
+
parts = output.split("\t")
|
|
4326
|
+
err_code = parts[1] if len(parts) > 1 else "0"
|
|
4327
|
+
err_msg = parts[2] if len(parts) > 2 else "Unknown error"
|
|
4328
|
+
|
|
4329
|
+
# Error -1712 is "AppleEvent timed out" - Photos was waiting for user interaction
|
|
4330
|
+
if err_code == "-1712":
|
|
4331
|
+
print("\n⚠️ Apple Photos is waiting for user interaction (dialog open)")
|
|
4332
|
+
print(" Please close any Photos dialogs and press Enter to retry...")
|
|
4333
|
+
print(" Or type 'abort' to cancel the import.")
|
|
4334
|
+
try:
|
|
4335
|
+
user_input = input(f" [Attempt {attempt + 1}/{max_retries}] Press Enter to retry or 'abort' to cancel: ").strip().lower()
|
|
4336
|
+
if user_input == "abort":
|
|
4337
|
+
raise RuntimeError(f"Photos import aborted by user after AppleEvent timeout [{err_code}]: {err_msg}")
|
|
4338
|
+
LOG.info("Retrying Photos import after user closed dialog...")
|
|
4339
|
+
continue # Retry the import
|
|
4340
|
+
except (KeyboardInterrupt, EOFError):
|
|
4341
|
+
raise RuntimeError(f"Photos import cancelled by user [{err_code}]: {err_msg}")
|
|
4342
|
+
|
|
4343
|
+
# Other errors are fatal
|
|
4344
|
+
raise RuntimeError(f"Photos import failed [{err_code}]: {err_msg}")
|
|
4345
|
+
|
|
4346
|
+
# Success - no error, break out of retry loop
|
|
4347
|
+
break
|
|
4348
|
+
else:
|
|
4349
|
+
# Exhausted all retries
|
|
4350
|
+
raise RuntimeError(f"Photos import failed after {max_retries} attempts due to repeated AppleEvent timeouts")
|
|
4351
|
+
|
|
4352
|
+
# Parse imported filenames from AppleScript output
|
|
4353
|
+
# Format: "FN\t<filename>" per line
|
|
4354
|
+
imported_names = []
|
|
4355
|
+
line_count = 0
|
|
4356
|
+
for line in output.split("\n"):
|
|
4357
|
+
line_count += 1
|
|
4358
|
+
line = line.strip()
|
|
4359
|
+
if line.startswith("FN\t"):
|
|
4360
|
+
filename = line[3:] # Remove "FN\t" prefix
|
|
4361
|
+
imported_names.append(filename)
|
|
4362
|
+
|
|
4363
|
+
LOG.debug(f"DEBUG: Parsed {len(imported_names)} filenames from {line_count} total lines")
|
|
4364
|
+
|
|
4365
|
+
LOG.debug("Photos returned %d imported filenames", len(imported_names))
|
|
4366
|
+
|
|
4367
|
+
if LOG.isEnabledFor(logging.DEBUG):
|
|
4368
|
+
debug_parent = _log_directory() or staging_dir.parent
|
|
4369
|
+
timestamp_segment = dt.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
4370
|
+
debug_parent.mkdir(parents=True, exist_ok=True)
|
|
4371
|
+
photos_output_file = debug_parent / f"DEBUG_photos_output_{timestamp_segment}.txt"
|
|
4372
|
+
with photos_output_file.open("w", encoding="utf-8") as text_handle:
|
|
4373
|
+
text_handle.write("FILENAMES RETURNED BY PHOTOS.APP:\n")
|
|
4374
|
+
text_handle.write("=" * 80 + "\n")
|
|
4375
|
+
for name in sorted(imported_names):
|
|
4376
|
+
text_handle.write(f"{name}\n")
|
|
4377
|
+
LOG.debug("DEBUG: Photos output saved to %s", photos_output_file)
|
|
4378
|
+
|
|
4379
|
+
# DEBUG: Log first 5 filenames returned by Photos
|
|
4380
|
+
LOG.debug("DEBUG: First 5 filenames returned by Photos:")
|
|
4381
|
+
for i, name in enumerate(imported_names[:5]):
|
|
4382
|
+
LOG.debug(f" [{i}] {repr(name)}")
|
|
4383
|
+
|
|
4384
|
+
photos_imported_count = len(imported_names)
|
|
4385
|
+
staged_count = len(staged_media)
|
|
4386
|
+
|
|
4387
|
+
LOG.debug("Reconciliation: Photos returned %d items, staged %d files", photos_imported_count, staged_count)
|
|
4388
|
+
|
|
4389
|
+
token_to_media: dict[str, MediaFile] = {}
|
|
4390
|
+
for media in staged_media:
|
|
4391
|
+
token_value = media.metadata.get("staging_token")
|
|
4392
|
+
if token_value:
|
|
4393
|
+
token_to_media[token_value] = media
|
|
4394
|
+
|
|
4395
|
+
imported_media: list[MediaFile] = []
|
|
4396
|
+
skipped_media: list[MediaFile] = []
|
|
4397
|
+
matched_media_ids: set[int] = set()
|
|
4398
|
+
unmatched_names: list[str] = []
|
|
4399
|
+
|
|
4400
|
+
for name in imported_names:
|
|
4401
|
+
tokens = [match.group(1) for match in STAGING_TOKEN_PATTERN.finditer(name)]
|
|
4402
|
+
assigned = False
|
|
4403
|
+
for token_value in tokens:
|
|
4404
|
+
matched_media = token_to_media.get(token_value)
|
|
4405
|
+
if matched_media and id(matched_media) not in matched_media_ids:
|
|
4406
|
+
token_to_media.pop(token_value, None)
|
|
4407
|
+
matched_media_ids.add(id(matched_media))
|
|
4408
|
+
matched_media.metadata["photos_returned_name"] = name
|
|
4409
|
+
imported_media.append(matched_media)
|
|
4410
|
+
assigned = True
|
|
4411
|
+
break
|
|
4412
|
+
if not assigned:
|
|
4413
|
+
unmatched_names.append(name)
|
|
4414
|
+
|
|
4415
|
+
remaining_media = [media for media in staged_media if id(media) not in matched_media_ids]
|
|
4416
|
+
imported_counter: Counter[str] = Counter(unmatched_names)
|
|
4417
|
+
|
|
4418
|
+
def consume_exact(name: str) -> Optional[str]:
|
|
4419
|
+
if imported_counter.get(name, 0) > 0:
|
|
4420
|
+
imported_counter[name] -= 1
|
|
4421
|
+
if imported_counter[name] == 0:
|
|
4422
|
+
del imported_counter[name]
|
|
4423
|
+
return name
|
|
4424
|
+
return None
|
|
4425
|
+
|
|
4426
|
+
def consume_casefold(name: str) -> Optional[str]:
|
|
4427
|
+
lowered = name.casefold()
|
|
4428
|
+
for candidate in list(imported_counter.keys()):
|
|
4429
|
+
if imported_counter[candidate] > 0 and candidate.casefold() == lowered:
|
|
4430
|
+
imported_counter[candidate] -= 1
|
|
4431
|
+
if imported_counter[candidate] == 0:
|
|
4432
|
+
del imported_counter[candidate]
|
|
4433
|
+
return candidate
|
|
4434
|
+
return None
|
|
4435
|
+
|
|
4436
|
+
def consume_name(name: str) -> Optional[str]:
|
|
4437
|
+
return consume_exact(name) or consume_casefold(name)
|
|
4438
|
+
|
|
4439
|
+
name_suffix_pattern = re.compile(r"^(.*)[ _]?\([0-9-]+\)(\.[^.]+)$")
|
|
4440
|
+
|
|
4441
|
+
def strip_staging_suffix(name: str) -> Optional[str]:
|
|
4442
|
+
match = name_suffix_pattern.match(name)
|
|
4443
|
+
if match:
|
|
4444
|
+
return f"{match.group(1)}{match.group(2)}"
|
|
4445
|
+
return None
|
|
4446
|
+
|
|
4447
|
+
for media in remaining_media:
|
|
4448
|
+
stage_path = media.stage_path
|
|
4449
|
+
if stage_path is None:
|
|
4450
|
+
skipped_media.append(media)
|
|
4451
|
+
continue
|
|
4452
|
+
stage_name = stage_path.name
|
|
4453
|
+
candidates = [stage_name]
|
|
4454
|
+
staging_stem = media.metadata.get("staging_stem")
|
|
4455
|
+
if staging_stem:
|
|
4456
|
+
base_candidate = f"{staging_stem}{media.extension}" if media.extension else staging_stem
|
|
4457
|
+
if base_candidate not in candidates:
|
|
4458
|
+
candidates.append(base_candidate)
|
|
4459
|
+
tokenized_stem = media.metadata.get("staging_tokenized_stem")
|
|
4460
|
+
if tokenized_stem:
|
|
4461
|
+
token_base = f"{tokenized_stem}{media.extension}" if media.extension else tokenized_stem
|
|
4462
|
+
if token_base not in candidates:
|
|
4463
|
+
candidates.append(token_base)
|
|
4464
|
+
if tokenized_stem.endswith("__"):
|
|
4465
|
+
single_variant = tokenized_stem[:-1]
|
|
4466
|
+
token_base_single = f"{single_variant}{media.extension}" if media.extension else single_variant
|
|
4467
|
+
if token_base_single not in candidates:
|
|
4468
|
+
candidates.append(token_base_single)
|
|
4469
|
+
single_stage = stage_name.replace(tokenized_stem, single_variant)
|
|
4470
|
+
if single_stage not in candidates:
|
|
4471
|
+
candidates.append(single_stage)
|
|
4472
|
+
trimmed_candidate = strip_staging_suffix(stage_name)
|
|
4473
|
+
if trimmed_candidate and trimmed_candidate not in candidates:
|
|
4474
|
+
candidates.append(trimmed_candidate)
|
|
4475
|
+
|
|
4476
|
+
matched_name = None
|
|
4477
|
+
for candidate in candidates:
|
|
4478
|
+
matched_name = consume_name(candidate)
|
|
4479
|
+
if matched_name:
|
|
4480
|
+
break
|
|
4481
|
+
|
|
4482
|
+
if matched_name:
|
|
4483
|
+
media.metadata["photos_returned_name"] = matched_name
|
|
4484
|
+
imported_media.append(media)
|
|
4485
|
+
matched_media_ids.add(id(media))
|
|
4486
|
+
else:
|
|
4487
|
+
skipped_media.append(media)
|
|
4488
|
+
|
|
4489
|
+
leftover_imported = list(imported_counter.elements())
|
|
4490
|
+
|
|
4491
|
+
if leftover_imported:
|
|
4492
|
+
LOG.warning(
|
|
4493
|
+
"Photos returned %d filename(s) that did not match staged files; first entries: %s",
|
|
4494
|
+
len(leftover_imported),
|
|
4495
|
+
leftover_imported[:5],
|
|
4496
|
+
)
|
|
4497
|
+
|
|
4498
|
+
if skipped_media:
|
|
4499
|
+
LOG.warning("Photos did not report %d staged file(s); treating them as skipped.", len(skipped_media))
|
|
4500
|
+
rejection_parent = _log_directory() or staging_dir.parent
|
|
4501
|
+
rejection_parent.mkdir(parents=True, exist_ok=True)
|
|
4502
|
+
rejection_path = rejection_parent / f"Photos_rejections_{dt.datetime.now().strftime('%Y%m%d%H%M%S')}.txt"
|
|
4503
|
+
with rejection_path.open("w", encoding="utf-8") as rejection_handle:
|
|
4504
|
+
rejection_handle.write("FILES REJECTED OR MISSING FROM PHOTOS IMPORT\n")
|
|
4505
|
+
rejection_handle.write("=" * 80 + "\n")
|
|
4506
|
+
for media in skipped_media:
|
|
4507
|
+
stage_name = media.stage_path.name if media.stage_path else "<missing>"
|
|
4508
|
+
original_source = media.metadata.get("original_source") or str(media.source)
|
|
4509
|
+
rejection_handle.write(f"Staged: {stage_name}\tOriginal: {original_source}\n")
|
|
4510
|
+
if leftover_imported:
|
|
4511
|
+
rejection_handle.write("\nFILENAMES RETURNED BY PHOTOS WITH NO MATCH\n")
|
|
4512
|
+
rejection_handle.write("=" * 80 + "\n")
|
|
4513
|
+
for name in leftover_imported:
|
|
4514
|
+
rejection_handle.write(f"{name}\n")
|
|
4515
|
+
LOG.info("Photos rejection details written to %s", rejection_path)
|
|
4516
|
+
else:
|
|
4517
|
+
LOG.info("All %d staged file(s) reported by Photos.", len(imported_media))
|
|
4518
|
+
|
|
4519
|
+
imported_count = len(imported_media)
|
|
4520
|
+
skipped_count = len(skipped_media)
|
|
4521
|
+
|
|
4522
|
+
LOG.info(
|
|
4523
|
+
"Folder import complete: %d imported, %d skipped (duplicates or rejected by Photos)",
|
|
4524
|
+
imported_count,
|
|
4525
|
+
skipped_count,
|
|
4526
|
+
)
|
|
4527
|
+
|
|
4528
|
+
return imported_count, skipped_count, skipped_media
|
|
4529
|
+
|
|
4530
|
+
|
|
4531
|
+
def prompt_retry_failed_imports() -> bool:
|
|
4532
|
+
"""Prompt the user whether to retry failed Apple Photos imports."""
|
|
4533
|
+
while True:
|
|
4534
|
+
try:
|
|
4535
|
+
response = input("\nWould you like to retry importing the failed files? (y/n): ").strip().lower()
|
|
4536
|
+
if response in ("y", "yes"):
|
|
4537
|
+
return True
|
|
4538
|
+
elif response in ("n", "no"):
|
|
4539
|
+
return False
|
|
4540
|
+
else:
|
|
4541
|
+
print("Please enter 'y' or 'n'.")
|
|
4542
|
+
except (KeyboardInterrupt, EOFError):
|
|
4543
|
+
print("\nNo retry.")
|
|
4544
|
+
return False
|
|
4545
|
+
|
|
4546
|
+
|
|
4547
|
+
def confirm_scan(root: Path, output_dir: Path, assume_yes: bool) -> bool:
|
|
4548
|
+
"""Ask user confirmation before scanning and staging.
|
|
4549
|
+
|
|
4550
|
+
Args:
|
|
4551
|
+
root: directory or file being scanned
|
|
4552
|
+
output_dir: directory where staging/logs will be written
|
|
4553
|
+
assume_yes: skip prompt when True
|
|
4554
|
+
"""
|
|
4555
|
+
|
|
4556
|
+
if assume_yes:
|
|
4557
|
+
return True
|
|
4558
|
+
|
|
4559
|
+
print("\nAbout to scan and import media with Smart Media Manager")
|
|
4560
|
+
print(f" Scan root: {root}")
|
|
4561
|
+
print(f" Logs/staging will be created under: {output_dir}")
|
|
4562
|
+
print("Press Enter to continue or 'n' to abort.")
|
|
4563
|
+
|
|
4564
|
+
try:
|
|
4565
|
+
response = input("Proceed? [Y/n]: ").strip().lower()
|
|
4566
|
+
except (KeyboardInterrupt, EOFError):
|
|
4567
|
+
print("\nAborted by user.")
|
|
4568
|
+
return False
|
|
4569
|
+
|
|
4570
|
+
if response in ("", "y", "yes"): # default yes
|
|
4571
|
+
return True
|
|
4572
|
+
print("Aborted by user.")
|
|
4573
|
+
return False
|
|
4574
|
+
|
|
4575
|
+
|
|
4576
|
+
def cleanup_staging(staging: Path) -> None:
|
|
4577
|
+
if staging.exists():
|
|
4578
|
+
LOG.debug("Deleting staging folder %s", staging)
|
|
4579
|
+
shutil.rmtree(staging)
|
|
4580
|
+
|
|
4581
|
+
|
|
4582
|
+
def configure_logging() -> None:
|
|
4583
|
+
LOG.setLevel(logging.INFO)
|
|
4584
|
+
LOG.handlers.clear()
|
|
4585
|
+
console = logging.StreamHandler()
|
|
4586
|
+
console.setLevel(logging.WARNING)
|
|
4587
|
+
console.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
|
|
4588
|
+
LOG.addHandler(console)
|
|
4589
|
+
|
|
4590
|
+
|
|
4591
|
+
def attach_file_logger(root: Path, run_ts: str) -> Path:
|
|
4592
|
+
"""Create timestamped log directory in CWD and attach file logger.
|
|
4593
|
+
|
|
4594
|
+
Args:
|
|
4595
|
+
root: Scan root directory (not used for log location, kept for compatibility)
|
|
4596
|
+
run_ts: Timestamp string for this run
|
|
4597
|
+
|
|
4598
|
+
Returns:
|
|
4599
|
+
Path to created log file
|
|
4600
|
+
|
|
4601
|
+
Note:
|
|
4602
|
+
Log directory is created in current working directory (not scan root)
|
|
4603
|
+
with pattern: .smm__runtime_logs_YYYYMMDD_HHMMSS_<uuid>
|
|
4604
|
+
This prevents logs from being scanned as media files.
|
|
4605
|
+
"""
|
|
4606
|
+
global _FILE_LOG_HANDLER
|
|
4607
|
+
if _FILE_LOG_HANDLER is not None:
|
|
4608
|
+
return Path(_FILE_LOG_HANDLER.baseFilename) # type: ignore[attr-defined]
|
|
4609
|
+
|
|
4610
|
+
# Create unique timestamped log directory in CWD (not scan root)
|
|
4611
|
+
# Format: .smm__runtime_logs_YYYYMMDD_HHMMSS_<short-uuid>
|
|
4612
|
+
short_uuid = str(uuid.uuid4())[:8] # First 8 chars of UUID for uniqueness
|
|
4613
|
+
log_dir_name = f"{SMM_LOGS_SUBDIR}{run_ts}_{short_uuid}"
|
|
4614
|
+
log_dir = Path.cwd() / log_dir_name
|
|
4615
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
4616
|
+
|
|
4617
|
+
# Log file inside the timestamped directory
|
|
4618
|
+
path = log_dir / f"smm_run_{run_ts}.log"
|
|
4619
|
+
handler = logging.FileHandler(path, encoding="utf-8")
|
|
4620
|
+
handler.setLevel(logging.INFO)
|
|
4621
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
4622
|
+
LOG.addHandler(handler)
|
|
4623
|
+
_FILE_LOG_HANDLER = handler
|
|
4624
|
+
return path
|
|
4625
|
+
|
|
4626
|
+
|
|
4627
|
+
def validate_root(path: Path, allow_file: bool = False) -> Path:
|
|
4628
|
+
resolved = path.expanduser().resolve()
|
|
4629
|
+
if not resolved.exists():
|
|
4630
|
+
raise RuntimeError(f"Path does not exist: {resolved}")
|
|
4631
|
+
if not resolved.is_dir() and not (allow_file and resolved.is_file()):
|
|
4632
|
+
raise RuntimeError(f"Path must be a {'file or ' if allow_file else ''}directory: {resolved}")
|
|
4633
|
+
return resolved
|
|
4634
|
+
|
|
4635
|
+
|
|
4636
|
+
def main() -> int:
|
|
4637
|
+
configure_logging()
|
|
4638
|
+
args = parse_args()
|
|
4639
|
+
LOG.info("smart-media-manager %s", __version__)
|
|
4640
|
+
skip_bootstrap = args.skip_bootstrap or bool(os.environ.get("SMART_MEDIA_MANAGER_SKIP_BOOTSTRAP"))
|
|
4641
|
+
if skip_bootstrap:
|
|
4642
|
+
LOG.debug("Skipping dependency bootstrap (manual mode).")
|
|
4643
|
+
else:
|
|
4644
|
+
ensure_system_dependencies()
|
|
4645
|
+
media_files: list[MediaFile] = []
|
|
4646
|
+
staging_root: Optional[Path] = None
|
|
4647
|
+
skip_log: Optional[Path] = None
|
|
4648
|
+
skip_logger: Optional[SkipLogger] = None
|
|
4649
|
+
stats = RunStatistics()
|
|
4650
|
+
try:
|
|
4651
|
+
# Auto-detect if path is a file or directory
|
|
4652
|
+
is_single_file = args.path.is_file()
|
|
4653
|
+
|
|
4654
|
+
# Warn if --recursive is used with a single file (it will be ignored)
|
|
4655
|
+
if is_single_file and args.recursive:
|
|
4656
|
+
LOG.warning("--recursive flag ignored when processing a single file")
|
|
4657
|
+
print("Warning: --recursive flag ignored when processing a single file")
|
|
4658
|
+
|
|
4659
|
+
root = validate_root(args.path, allow_file=is_single_file)
|
|
4660
|
+
run_ts = timestamp()
|
|
4661
|
+
|
|
4662
|
+
# For single file mode, use parent directory for outputs; otherwise use scan root
|
|
4663
|
+
output_dir = root.parent if is_single_file else root
|
|
4664
|
+
|
|
4665
|
+
if not confirm_scan(root, output_dir, args.assume_yes):
|
|
4666
|
+
return 0
|
|
4667
|
+
|
|
4668
|
+
# Check write permissions for both CWD (logs) and output_dir (skip logs, staging)
|
|
4669
|
+
try:
|
|
4670
|
+
check_write_permission(Path.cwd(), "create logs")
|
|
4671
|
+
except (PermissionError, OSError) as e:
|
|
4672
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
4673
|
+
return 1
|
|
4674
|
+
|
|
4675
|
+
try:
|
|
4676
|
+
check_write_permission(output_dir, "create skip logs and staging directory")
|
|
4677
|
+
except (PermissionError, OSError) as e:
|
|
4678
|
+
print(f"ERROR: {e}", file=sys.stderr)
|
|
4679
|
+
return 1
|
|
4680
|
+
|
|
4681
|
+
log_path = attach_file_logger(root, run_ts) # root arg kept for compatibility, not used for log location
|
|
4682
|
+
configure_pillow_max_image_pixels(args.max_image_pixels)
|
|
4683
|
+
|
|
4684
|
+
for dependency in ("ffprobe", "ffmpeg", "osascript"):
|
|
4685
|
+
ensure_dependency(dependency)
|
|
4686
|
+
LOG.info("Scanning %s for media files...", root)
|
|
4687
|
+
print(f"Scanning {root}...")
|
|
4688
|
+
|
|
4689
|
+
# Skip log goes in output directory (scan root or parent of single file)
|
|
4690
|
+
skip_log = output_dir / f"smm_skipped_files_{run_ts}.log"
|
|
4691
|
+
if skip_log.exists():
|
|
4692
|
+
skip_log.unlink()
|
|
4693
|
+
skip_logger = SkipLogger(skip_log)
|
|
4694
|
+
|
|
4695
|
+
# Handle single file mode
|
|
4696
|
+
if is_single_file:
|
|
4697
|
+
media, reject_reason = detect_media(root, args.skip_compatibility_check)
|
|
4698
|
+
if media:
|
|
4699
|
+
media_files = [media]
|
|
4700
|
+
stats.total_files_scanned = 1
|
|
4701
|
+
stats.total_binary_files = 1
|
|
4702
|
+
stats.total_media_detected = 1
|
|
4703
|
+
if media.compatible:
|
|
4704
|
+
stats.media_compatible = 1
|
|
4705
|
+
else:
|
|
4706
|
+
stats.media_incompatible = 1
|
|
4707
|
+
elif reject_reason:
|
|
4708
|
+
skip_logger.log(root, reject_reason)
|
|
4709
|
+
LOG.debug("File rejected: %s", reject_reason)
|
|
4710
|
+
return 0
|
|
4711
|
+
else:
|
|
4712
|
+
LOG.debug("File is not a supported media format.")
|
|
4713
|
+
return 0
|
|
4714
|
+
else:
|
|
4715
|
+
media_files = gather_media_files(
|
|
4716
|
+
root,
|
|
4717
|
+
args.recursive,
|
|
4718
|
+
args.follow_symlinks,
|
|
4719
|
+
skip_logger,
|
|
4720
|
+
stats,
|
|
4721
|
+
args.skip_compatibility_check,
|
|
4722
|
+
)
|
|
4723
|
+
if not media_files:
|
|
4724
|
+
LOG.warning("No media files detected.")
|
|
4725
|
+
if skip_logger and not skip_logger.has_entries() and skip_log.exists():
|
|
4726
|
+
skip_log.unlink()
|
|
4727
|
+
return 0
|
|
4728
|
+
ensure_raw_dependencies_for_files(media_files)
|
|
4729
|
+
|
|
4730
|
+
# Create staging directory in output directory (scan root or parent of single file)
|
|
4731
|
+
staging_root = output_dir / f"FOUND_MEDIA_FILES_{run_ts}"
|
|
4732
|
+
staging_root.mkdir(parents=True, exist_ok=False)
|
|
4733
|
+
|
|
4734
|
+
# Create originals directory OUTSIDE staging folder (sibling directory)
|
|
4735
|
+
# CRITICAL: Must NOT be inside staging_root or Photos will try to import incompatible original files!
|
|
4736
|
+
originals_root = output_dir / f"ORIGINALS_{run_ts}"
|
|
4737
|
+
|
|
4738
|
+
move_to_staging(media_files, staging_root, originals_root, copy_files=args.copy_mode)
|
|
4739
|
+
ensure_compatibility(media_files, skip_logger, stats, args.skip_convert)
|
|
4740
|
+
# No sanitization needed - sequential suffix already ensures uniqueness
|
|
4741
|
+
update_stats_after_compatibility(stats, media_files)
|
|
4742
|
+
|
|
4743
|
+
missing_media: list[MediaFile] = [media for media in media_files if not media.stage_path or not media.stage_path.exists()]
|
|
4744
|
+
|
|
4745
|
+
if missing_media:
|
|
4746
|
+
missing_listing = ", ".join(str((m.stage_path or m.source)) for m in missing_media[:5])
|
|
4747
|
+
raise RuntimeError(f"Missing staged file(s): {missing_listing}")
|
|
4748
|
+
|
|
4749
|
+
staged_count = len(media_files)
|
|
4750
|
+
LOG.info("Preparing to import %d staged file(s) into Apple Photos", staged_count)
|
|
4751
|
+
print(f"\nStaging completed: {staged_count} file(s) ready for Photos import.")
|
|
4752
|
+
|
|
4753
|
+
update_stats_after_compatibility(stats, media_files)
|
|
4754
|
+
stats.log_summary()
|
|
4755
|
+
stats.print_summary()
|
|
4756
|
+
|
|
4757
|
+
LOG.info("Importing %d file(s) into Apple Photos via folder import...", staged_count)
|
|
4758
|
+
print(f"Importing {staged_count} file(s) into Apple Photos...")
|
|
4759
|
+
|
|
4760
|
+
# DEBUG: Timestamp when folder import is about to be called
|
|
4761
|
+
current_timestamp = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
4762
|
+
LOG.debug("🚨 TIMESTAMP %s - The function 'import_folder_to_photos' was called now. No imports should have been attempted before this time!", current_timestamp)
|
|
4763
|
+
print(f"🚨 TIMESTAMP {current_timestamp} - Calling import_folder_to_photos NOW")
|
|
4764
|
+
|
|
4765
|
+
# Single folder import replaces batch import - no timing dependencies
|
|
4766
|
+
# By default, Photos will check for duplicates and prompt the user
|
|
4767
|
+
imported_count, skipped_count, skipped_media = import_folder_to_photos(
|
|
4768
|
+
staging_dir=staging_root,
|
|
4769
|
+
media_files=media_files,
|
|
4770
|
+
album_name=args.album,
|
|
4771
|
+
skip_duplicates=args.skip_duplicate_check,
|
|
4772
|
+
)
|
|
4773
|
+
|
|
4774
|
+
# Log skipped files (duplicates or rejected by Photos) and populate stats
|
|
4775
|
+
if skipped_media:
|
|
4776
|
+
for media in skipped_media:
|
|
4777
|
+
log_target = media.stage_path or media.metadata.get("original_source") or media.source
|
|
4778
|
+
skip_logger.log(Path(log_target), "Skipped by Photos (duplicate or incompatible format)")
|
|
4779
|
+
# Issue #3: Populate refused_filenames for enhanced error reporting
|
|
4780
|
+
stats.refused_filenames.append((Path(log_target), "Skipped by Photos (duplicate or incompatible format)"))
|
|
4781
|
+
LOG.warning("%d file(s) skipped by Photos (see skip log)", skipped_count)
|
|
4782
|
+
# Issue #3: Track refused count for statistics
|
|
4783
|
+
stats.refused_by_apple_photos = skipped_count
|
|
4784
|
+
|
|
4785
|
+
# Update statistics
|
|
4786
|
+
stats.total_imported = imported_count
|
|
4787
|
+
for media in media_files:
|
|
4788
|
+
if media not in skipped_media:
|
|
4789
|
+
if media.was_converted:
|
|
4790
|
+
stats.imported_after_conversion += 1
|
|
4791
|
+
else:
|
|
4792
|
+
stats.imported_without_conversion += 1
|
|
4793
|
+
|
|
4794
|
+
# Print statistics summary
|
|
4795
|
+
stats.print_summary()
|
|
4796
|
+
stats.log_summary()
|
|
4797
|
+
|
|
4798
|
+
# Issue #2: Prompt user to retry failed imports
|
|
4799
|
+
if skipped_media and prompt_retry_failed_imports():
|
|
4800
|
+
LOG.info("Retrying import for %d failed file(s)...", len(skipped_media))
|
|
4801
|
+
print(f"\nRetrying import for {len(skipped_media)} file(s)...")
|
|
4802
|
+
|
|
4803
|
+
# Create temporary retry staging folder with only skipped files
|
|
4804
|
+
retry_staging = staging_root.parent / f"RETRY_STAGING_{timestamp()}"
|
|
4805
|
+
retry_staging.mkdir(parents=True, exist_ok=True)
|
|
4806
|
+
|
|
4807
|
+
# Move skipped files to retry staging
|
|
4808
|
+
retry_media: list[MediaFile] = []
|
|
4809
|
+
for media in skipped_media:
|
|
4810
|
+
if media.stage_path and media.stage_path.exists():
|
|
4811
|
+
retry_dest = retry_staging / media.stage_path.name
|
|
4812
|
+
shutil.move(str(media.stage_path), str(retry_dest))
|
|
4813
|
+
media.stage_path = retry_dest
|
|
4814
|
+
retry_media.append(media)
|
|
4815
|
+
|
|
4816
|
+
if retry_media:
|
|
4817
|
+
# Retry import with only the failed files
|
|
4818
|
+
retry_imported, retry_skipped, retry_skipped_media = import_folder_to_photos(
|
|
4819
|
+
staging_dir=retry_staging,
|
|
4820
|
+
media_files=retry_media,
|
|
4821
|
+
album_name=args.album,
|
|
4822
|
+
skip_duplicates=args.skip_duplicate_check,
|
|
4823
|
+
)
|
|
4824
|
+
|
|
4825
|
+
# Update statistics with retry results
|
|
4826
|
+
stats.total_imported += retry_imported
|
|
4827
|
+
stats.refused_by_apple_photos = len(retry_skipped_media)
|
|
4828
|
+
|
|
4829
|
+
# Update refused_filenames with final failures
|
|
4830
|
+
stats.refused_filenames.clear()
|
|
4831
|
+
for media in retry_skipped_media:
|
|
4832
|
+
log_target = media.stage_path or media.metadata.get("original_source") or media.source
|
|
4833
|
+
stats.refused_filenames.append((Path(log_target), "Failed after retry"))
|
|
4834
|
+
skip_logger.log(Path(log_target), "Failed after retry")
|
|
4835
|
+
|
|
4836
|
+
# Clean up retry staging folder
|
|
4837
|
+
if retry_staging.exists():
|
|
4838
|
+
shutil.rmtree(retry_staging)
|
|
4839
|
+
|
|
4840
|
+
LOG.info("Retry complete: %d imported, %d still failed", retry_imported, len(retry_skipped_media))
|
|
4841
|
+
print(f"Retry complete: {retry_imported} imported, {len(retry_skipped_media)} still failed")
|
|
4842
|
+
|
|
4843
|
+
# Reprint final statistics
|
|
4844
|
+
stats.print_summary()
|
|
4845
|
+
stats.log_summary()
|
|
4846
|
+
|
|
4847
|
+
LOG.info(
|
|
4848
|
+
"Successfully imported %d media file(s) into Apple Photos.",
|
|
4849
|
+
imported_count,
|
|
4850
|
+
)
|
|
4851
|
+
if args.delete:
|
|
4852
|
+
cleanup_staging(staging_root)
|
|
4853
|
+
else:
|
|
4854
|
+
LOG.debug("Staging folder retained at %s", staging_root)
|
|
4855
|
+
if skip_log and skip_log.exists():
|
|
4856
|
+
if skip_logger and skip_logger.has_entries():
|
|
4857
|
+
LOG.info("Skipped file log saved at %s", skip_log)
|
|
4858
|
+
else:
|
|
4859
|
+
skip_log.unlink()
|
|
4860
|
+
print(f"\nDetailed log: {log_path}")
|
|
4861
|
+
return 0
|
|
4862
|
+
except KeyboardInterrupt:
|
|
4863
|
+
# Graceful handling of Ctrl+C - save logs and exit cleanly
|
|
4864
|
+
LOG.warning("Operation interrupted by user (Ctrl+C)")
|
|
4865
|
+
print("\n\n" + "=" * 60)
|
|
4866
|
+
print("INTERRUPTED: Operation cancelled by user (Ctrl+C)")
|
|
4867
|
+
print("=" * 60)
|
|
4868
|
+
# Save skip log if it has entries
|
|
4869
|
+
if skip_log and skip_log.exists():
|
|
4870
|
+
if skip_logger and skip_logger.has_entries():
|
|
4871
|
+
LOG.info("Skipped file log saved at %s", skip_log)
|
|
4872
|
+
print(f"Skip log saved: {skip_log}")
|
|
4873
|
+
else:
|
|
4874
|
+
skip_log.unlink()
|
|
4875
|
+
# Point to detailed log
|
|
4876
|
+
if "log_path" in locals():
|
|
4877
|
+
LOG.info("Detailed log saved at %s", log_path)
|
|
4878
|
+
print(f"Detailed log: {log_path}")
|
|
4879
|
+
# Preserve staging folder for potential resume - don't revert
|
|
4880
|
+
if staging_root and staging_root.exists():
|
|
4881
|
+
print(f"Staging folder preserved: {staging_root}")
|
|
4882
|
+
print("(Files can be manually imported or removed)")
|
|
4883
|
+
print("=" * 60)
|
|
4884
|
+
return 130 # Standard exit code for Ctrl+C (128 + SIGINT=2)
|
|
4885
|
+
except Exception as exc: # noqa: BLE001
|
|
4886
|
+
LOG.error("Error: %s", exc)
|
|
4887
|
+
revert_media_files(media_files, staging_root)
|
|
4888
|
+
if skip_log and skip_log.exists():
|
|
4889
|
+
if skip_logger and skip_logger.has_entries():
|
|
4890
|
+
LOG.info("Skipped file log saved at %s", skip_log)
|
|
4891
|
+
else:
|
|
4892
|
+
skip_log.unlink()
|
|
4893
|
+
if "log_path" in locals():
|
|
4894
|
+
print(f"See detailed log: {log_path}")
|
|
4895
|
+
return 1
|
|
4896
|
+
finally:
|
|
4897
|
+
if UNKNOWN_MAPPINGS.has_entries():
|
|
4898
|
+
updates_path = UNKNOWN_MAPPINGS.write_updates(Path.cwd())
|
|
4899
|
+
if updates_path:
|
|
4900
|
+
print(f"Unknown format mappings saved to {updates_path}")
|
|
4901
|
+
|
|
4902
|
+
|
|
4903
|
+
def run() -> None:
|
|
4904
|
+
sys.exit(main())
|
|
4905
|
+
|
|
4906
|
+
|
|
4907
|
+
class ProgressReporter:
|
|
4908
|
+
def __init__(self, total: int, label: str) -> None:
|
|
4909
|
+
self.total = max(total, 0)
|
|
4910
|
+
self.label = label
|
|
4911
|
+
self.start = time.time()
|
|
4912
|
+
self.completed = 0
|
|
4913
|
+
self.last_render = 0.0
|
|
4914
|
+
self.dynamic = self.total == 0
|
|
4915
|
+
|
|
4916
|
+
def update(self, step: int = 1, force: bool = False) -> None:
|
|
4917
|
+
self.completed += step
|
|
4918
|
+
now = time.time()
|
|
4919
|
+
if not force and now - self.last_render < 0.1 and (not self.dynamic and self.completed < self.total):
|
|
4920
|
+
return
|
|
4921
|
+
self.last_render = now
|
|
4922
|
+
if self.dynamic:
|
|
4923
|
+
sys.stdout.write(f"\r{self.label}: processed {self.completed}")
|
|
4924
|
+
else:
|
|
4925
|
+
percent = min(self.completed / self.total if self.total else 1.0, 1.0)
|
|
4926
|
+
elapsed = now - self.start
|
|
4927
|
+
rate = self.completed / elapsed if elapsed > 0 else 0
|
|
4928
|
+
remaining = (self.total - self.completed) / rate if rate > 0 else float("inf")
|
|
4929
|
+
bar_len = 30
|
|
4930
|
+
filled = int(bar_len * percent)
|
|
4931
|
+
bar = "#" * filled + "-" * (bar_len - filled)
|
|
4932
|
+
eta = "--:--" if remaining == float("inf") else time.strftime("%M:%S", time.gmtime(int(remaining)))
|
|
4933
|
+
sys.stdout.write(f"\r{self.label}: [{bar}] {percent * 100:5.1f}% ETA {eta}")
|
|
4934
|
+
sys.stdout.flush()
|
|
4935
|
+
|
|
4936
|
+
def finish(self) -> None:
|
|
4937
|
+
if not self.dynamic:
|
|
4938
|
+
self.completed = self.total
|
|
4939
|
+
self.update(step=0, force=True)
|
|
4940
|
+
sys.stdout.write("\n")
|
|
4941
|
+
sys.stdout.flush()
|