absfuyu 5.0.0__py3-none-any.whl → 6.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of absfuyu might be problematic. Click here for more details.

Files changed (103) hide show
  1. absfuyu/__init__.py +5 -3
  2. absfuyu/__main__.py +3 -3
  3. absfuyu/cli/__init__.py +13 -2
  4. absfuyu/cli/audio_group.py +98 -0
  5. absfuyu/cli/color.py +30 -14
  6. absfuyu/cli/config_group.py +9 -2
  7. absfuyu/cli/do_group.py +23 -6
  8. absfuyu/cli/game_group.py +27 -2
  9. absfuyu/cli/tool_group.py +81 -11
  10. absfuyu/config/__init__.py +3 -3
  11. absfuyu/core/__init__.py +12 -8
  12. absfuyu/core/baseclass.py +929 -96
  13. absfuyu/core/baseclass2.py +44 -3
  14. absfuyu/core/decorator.py +70 -4
  15. absfuyu/core/docstring.py +64 -41
  16. absfuyu/core/dummy_cli.py +3 -3
  17. absfuyu/core/dummy_func.py +19 -6
  18. absfuyu/dxt/__init__.py +2 -2
  19. absfuyu/dxt/base_type.py +93 -0
  20. absfuyu/dxt/dictext.py +204 -16
  21. absfuyu/dxt/dxt_support.py +2 -2
  22. absfuyu/dxt/intext.py +151 -34
  23. absfuyu/dxt/listext.py +969 -127
  24. absfuyu/dxt/strext.py +77 -17
  25. absfuyu/extra/__init__.py +2 -2
  26. absfuyu/extra/audio/__init__.py +8 -0
  27. absfuyu/extra/audio/_util.py +57 -0
  28. absfuyu/extra/audio/convert.py +192 -0
  29. absfuyu/extra/audio/lossless.py +281 -0
  30. absfuyu/extra/beautiful.py +3 -2
  31. absfuyu/extra/da/__init__.py +72 -0
  32. absfuyu/extra/da/dadf.py +1600 -0
  33. absfuyu/extra/da/dadf_base.py +186 -0
  34. absfuyu/extra/da/df_func.py +181 -0
  35. absfuyu/extra/da/mplt.py +219 -0
  36. absfuyu/extra/ggapi/__init__.py +8 -0
  37. absfuyu/extra/ggapi/gdrive.py +223 -0
  38. absfuyu/extra/ggapi/glicense.py +148 -0
  39. absfuyu/extra/ggapi/glicense_df.py +186 -0
  40. absfuyu/extra/ggapi/gsheet.py +88 -0
  41. absfuyu/extra/img/__init__.py +30 -0
  42. absfuyu/extra/img/converter.py +402 -0
  43. absfuyu/extra/img/dup_check.py +291 -0
  44. absfuyu/extra/pdf.py +87 -0
  45. absfuyu/extra/rclone.py +253 -0
  46. absfuyu/extra/xml.py +90 -0
  47. absfuyu/fun/__init__.py +7 -20
  48. absfuyu/fun/rubik.py +442 -0
  49. absfuyu/fun/tarot.py +2 -2
  50. absfuyu/game/__init__.py +2 -2
  51. absfuyu/game/game_stat.py +2 -2
  52. absfuyu/game/schulte.py +78 -0
  53. absfuyu/game/sudoku.py +2 -2
  54. absfuyu/game/tictactoe.py +2 -3
  55. absfuyu/game/wordle.py +6 -4
  56. absfuyu/general/__init__.py +4 -4
  57. absfuyu/general/content.py +4 -4
  58. absfuyu/general/human.py +2 -2
  59. absfuyu/general/resrel.py +213 -0
  60. absfuyu/general/shape.py +3 -8
  61. absfuyu/general/tax.py +344 -0
  62. absfuyu/logger.py +806 -59
  63. absfuyu/numbers/__init__.py +13 -0
  64. absfuyu/numbers/number_to_word.py +321 -0
  65. absfuyu/numbers/shorten_number.py +303 -0
  66. absfuyu/numbers/time_duration.py +217 -0
  67. absfuyu/pkg_data/__init__.py +2 -2
  68. absfuyu/pkg_data/deprecated.py +2 -2
  69. absfuyu/pkg_data/logo.py +1462 -0
  70. absfuyu/sort.py +4 -4
  71. absfuyu/tools/__init__.py +28 -2
  72. absfuyu/tools/checksum.py +144 -9
  73. absfuyu/tools/converter.py +120 -34
  74. absfuyu/tools/generator.py +461 -0
  75. absfuyu/tools/inspector.py +752 -0
  76. absfuyu/tools/keygen.py +2 -2
  77. absfuyu/tools/obfuscator.py +47 -9
  78. absfuyu/tools/passwordlib.py +89 -25
  79. absfuyu/tools/shutdownizer.py +3 -8
  80. absfuyu/tools/sw.py +718 -0
  81. absfuyu/tools/web.py +10 -13
  82. absfuyu/typings.py +138 -0
  83. absfuyu/util/__init__.py +114 -6
  84. absfuyu/util/api.py +41 -18
  85. absfuyu/util/cli.py +119 -0
  86. absfuyu/util/gui.py +91 -0
  87. absfuyu/util/json_method.py +43 -14
  88. absfuyu/util/lunar.py +2 -2
  89. absfuyu/util/package.py +124 -0
  90. absfuyu/util/path.py +702 -82
  91. absfuyu/util/performance.py +122 -7
  92. absfuyu/util/shorten_number.py +244 -21
  93. absfuyu/util/text_table.py +481 -0
  94. absfuyu/util/zipped.py +8 -7
  95. absfuyu/version.py +79 -59
  96. {absfuyu-5.0.0.dist-info → absfuyu-6.1.2.dist-info}/METADATA +52 -11
  97. absfuyu-6.1.2.dist-info/RECORD +105 -0
  98. {absfuyu-5.0.0.dist-info → absfuyu-6.1.2.dist-info}/WHEEL +1 -1
  99. absfuyu/extra/data_analysis.py +0 -1078
  100. absfuyu/general/generator.py +0 -303
  101. absfuyu-5.0.0.dist-info/RECORD +0 -68
  102. {absfuyu-5.0.0.dist-info → absfuyu-6.1.2.dist-info}/entry_points.txt +0 -0
  103. {absfuyu-5.0.0.dist-info → absfuyu-6.1.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,402 @@
1
+ """
2
+ Absfuyu: Picture converter
3
+ --------------------------
4
+ Image converter
5
+
6
+
7
+ Version: 6.1.1
8
+ Date updated: 30/12/2025 (dd/mm/yyyy)
9
+ """
10
+
11
+ # Module level
12
+ # ---------------------------------------------------------------------------
13
+ __all__ = ["ImgConverter"]
14
+
15
+
16
+ # Library
17
+ # ---------------------------------------------------------------------------
18
+ import logging
19
+ import shutil
20
+ from functools import partial
21
+ from importlib.util import find_spec as check_for_package_installed
22
+ from pathlib import Path
23
+ from typing import Any, Literal, Protocol
24
+
25
+ from absfuyu.core.dummy_func import tqdm as tqdm_base
26
+ from absfuyu.util.path import DirectorySelectMixin
27
+
28
+ try:
29
+ from PIL import Image
30
+ from PIL import features as pil_features
31
+ from PIL.ImageFile import ImageFile
32
+ except ImportError:
33
+ from subprocess import run
34
+
35
+ from absfuyu.config import ABSFUYU_CONFIG
36
+
37
+ if ABSFUYU_CONFIG._get_setting("auto-install-extra").value: # type: ignore
38
+ cmd = "python -m pip install -U absfuyu[pic]".split()
39
+ run(cmd)
40
+ else:
41
+ raise SystemExit("This feature is in absfuyu[pic] package") # noqa: B904
42
+ else:
43
+ from PIL import Image
44
+ from PIL import features as pil_features
45
+ from PIL.ImageFile import ImageFile
46
+
47
+ try:
48
+ from pillow_heif import register_heif_opener # type: ignore
49
+ except ImportError:
50
+ from absfuyu.core.dummy_func import dummy_function as register_heif_opener
51
+ register_heif_opener()
52
+
53
+
54
+ # Setup
55
+ # ---------------------------------------------------------------------------
56
+ tqdm = partial(tqdm_base, unit_scale=True, dynamic_ncols=True)
57
+ SupportedImageFormat = Literal[".jpg", ".jpeg", ".png", ".webp"]
58
+
59
+
60
+ # Exporter/Converter
61
+ # ---------------------------------------------------------------------------
62
+ class SupportImageConverter(Protocol):
63
+ # Callable[[ImageFile, Path], None]
64
+ def __call__(self, image: ImageFile, path: Path, **params: Any) -> None: ...
65
+
66
+
67
+ def _image_convert_default(image: ImageFile, path: Path, **params: Any) -> None:
68
+ """
69
+ Default convert image function
70
+
71
+ Parameters
72
+ ----------
73
+ image : ImageFile
74
+ Image file
75
+
76
+ path : Path
77
+ Path to export
78
+ """
79
+ image.save(path, **params)
80
+
81
+
82
+ def _image_convert_webp(image: ImageFile, path: Path, **params: Any) -> None:
83
+ """
84
+ Convert image to .webp format (with custom settings)
85
+
86
+ Parameters
87
+ ----------
88
+ image : ImageFile
89
+ Image file
90
+
91
+ path : Path
92
+ Path to export
93
+ """
94
+ image.save(
95
+ path,
96
+ format="WEBP",
97
+ lossless=True,
98
+ quality=100,
99
+ alpha_quality=100,
100
+ method=4,
101
+ exact=False, # If true, preserve the transparent RGB values. Otherwise, discard invisible RGB values for better compression. Defaults to false.
102
+ **params,
103
+ )
104
+
105
+
106
+ def _image_convert_png(image: ImageFile, path: Path, **params: Any) -> None:
107
+ """
108
+ Convert image to .png format (with custom settings)
109
+
110
+ Parameters
111
+ ----------
112
+ image : ImageFile
113
+ Image file
114
+
115
+ path : Path
116
+ Path to export
117
+ """
118
+ image.save(
119
+ path,
120
+ format="PNG",
121
+ # optimize=True,
122
+ compress_level=6,
123
+ **params,
124
+ )
125
+
126
+
127
+ def _image_convert_jpg(image: ImageFile, path: Path, **params: Any) -> None:
128
+ """
129
+ Convert image to .jpg format (with custom settings)
130
+
131
+ Parameters
132
+ ----------
133
+ image : ImageFile
134
+ Image file
135
+
136
+ path : Path
137
+ Path to export
138
+ """
139
+
140
+ if image.mode == "RGBA":
141
+ white_background = Image.new("RGB", image.size, (255, 255, 255))
142
+ white_background.paste(image, mask=image.getchannel("A"))
143
+ image = white_background
144
+ else:
145
+ image = image.convert("RGB")
146
+
147
+ image.save(
148
+ path,
149
+ format="JPEG",
150
+ optimize=True,
151
+ keep_rgb=True,
152
+ **params,
153
+ )
154
+
155
+
156
+ # Class
157
+ # ---------------------------------------------------------------------------
158
+ class ImgConverter(DirectorySelectMixin):
159
+ _IMAGE_CONVERTER: dict[str, SupportImageConverter] = {
160
+ "default": _image_convert_default,
161
+ ".webp": _image_convert_webp,
162
+ ".png": _image_convert_png,
163
+ ".jpg": _image_convert_jpg,
164
+ ".jpeg": _image_convert_jpg,
165
+ }
166
+
167
+ def __init__(
168
+ self,
169
+ source_path: str | Path,
170
+ create_if_not_exist: bool = False,
171
+ backup_dir_name: str | None = None,
172
+ logger: logging.Logger | None = None,
173
+ ) -> None:
174
+ super().__init__(source_path, create_if_not_exist)
175
+
176
+ # Supported image extension
177
+ self._supported_image_format = [".png"]
178
+ self._register_img_format()
179
+
180
+ # Backup
181
+ if backup_dir_name is None:
182
+ backup_dir_name = "ZZZ_Backup"
183
+ self.backup_path = self.source_path.joinpath(backup_dir_name)
184
+
185
+ # Not available yet
186
+ self.logger = logger
187
+
188
+ # Extra format
189
+ @classmethod
190
+ def install_all_extension(cls) -> None:
191
+ """
192
+ Install all extra package to unlock all features
193
+ """
194
+ extra = [
195
+ "pillow_heif", # heic support
196
+ "defusedxml", # xmp
197
+ "olefile", # FPX and MIC images
198
+ ]
199
+ base = ["pip", "install", "-U"]
200
+ base.extend(extra)
201
+
202
+ import subprocess
203
+
204
+ subprocess.run(base)
205
+
206
+ def _register_img_format(self) -> None:
207
+ """
208
+ Try to register these format:
209
+ - ``.webp``
210
+ - ``.heif``, ``.heic`` (``pillow_heif`` package required)
211
+ """
212
+ if pil_features.check("jpg"):
213
+ self._supported_image_format.extend([".jpg", ".jpeg"])
214
+ if pil_features.check("webp"):
215
+ self._supported_image_format.append(".webp")
216
+
217
+ if check_for_package_installed("pillow_heif", "pillow_heif") is not None:
218
+ self._supported_image_format.extend([".heic", ".heif"])
219
+
220
+ @property
221
+ def supported_image_format(self) -> list[str]:
222
+ """
223
+ Supported image format
224
+
225
+ Returns
226
+ -------
227
+ list[str]
228
+ Supported image format
229
+ """
230
+ return self._supported_image_format
231
+
232
+ @classmethod
233
+ def add_converter(cls, format_name: str, converter_func: SupportImageConverter) -> None:
234
+ """
235
+ Add image converter function to a format
236
+
237
+ Parameters
238
+ ----------
239
+ format_name : str
240
+ Image format name
241
+
242
+ converter_func : SupportImageConverter
243
+ Converter function
244
+
245
+
246
+ Example:
247
+ --------
248
+ >>> ImgConverter.add_converter(".png", convert_to_png)
249
+ """
250
+ cls._IMAGE_CONVERTER[format_name] = converter_func
251
+
252
+ # Support
253
+ def _make_suffix_selection(self, exclude_suffix: str) -> tuple[str, ...]:
254
+ """
255
+ Make suffix selection (exclude the image with converted to suffix)
256
+
257
+ Parameters
258
+ ----------
259
+ exclude_suffix : str
260
+ Converted to suffix
261
+
262
+ Returns
263
+ -------
264
+ tuple[str, ...]
265
+ Suffix selection
266
+ """
267
+ # out = []
268
+ # for x in self._supported_image_format:
269
+ # if x.lower() == exclude_suffix.lower():
270
+ # continue
271
+ # out.append(x.lower())
272
+ # out.append(x.upper())
273
+ out = (x for x in self._supported_image_format if x.lower() != exclude_suffix.lower())
274
+ return tuple(out)
275
+
276
+ def _make_backup(self, src_file: Path) -> None:
277
+ dest = self.backup_path.joinpath(src_file.name)
278
+ shutil.move(src_file, dest)
279
+
280
+ # Convert
281
+ def _image_convert_legacy(
282
+ self,
283
+ path: Path,
284
+ to_format: SupportedImageFormat | None = None,
285
+ lossless: bool = True,
286
+ compression_level: int | None = None,
287
+ ) -> None:
288
+ """
289
+ Convert image to other format (settings are mostly for .webp format)
290
+
291
+ Parameters
292
+ ----------
293
+ path : Path
294
+ Path to image
295
+
296
+ to_format : SupportedImageFormat | None, optional
297
+ New image format, by default None
298
+
299
+ lossless : bool, optional
300
+ Lossless compression, by default True
301
+
302
+ compression_level : int | None, optional
303
+ Compression level, by default None
304
+ """
305
+ # Load image
306
+ new_suffix = path.suffix if to_format is None else to_format
307
+ image = Image.open(path)
308
+
309
+ # Extract metadata
310
+ # exif = image.info.get("exif")
311
+ # xmp = image.getxmp()
312
+ # icc_profile = image.info.get("icc_profile")
313
+ xmp = image.info.get("xmp")
314
+ exif = image.getexif()
315
+ icc_profile = image.info.get("icc_profile")
316
+ # print(image.info.keys())
317
+
318
+ # Save
319
+ image.save(
320
+ path.with_suffix(new_suffix),
321
+ format=new_suffix[1:].upper(),
322
+ lossless=lossless,
323
+ quality=100,
324
+ alpha_quality=100,
325
+ method=(4 if compression_level is None else compression_level),
326
+ exact=False, # If true, preserve the transparent RGB values. Otherwise, discard invisible RGB values for better compression. Defaults to false.
327
+ exif=exif,
328
+ icc_profile=icc_profile,
329
+ xmp=xmp,
330
+ )
331
+
332
+ def _image_convert(
333
+ self,
334
+ path: Path,
335
+ to_format: str | None = None,
336
+ ) -> None:
337
+ """
338
+ Convert image to other format
339
+
340
+ Parameters
341
+ ----------
342
+ path : Path
343
+ Path to image
344
+
345
+ to_format : SupportedImageFormat | None, optional
346
+ New image format, by default None
347
+ """
348
+ # Load image
349
+ new_suffix = path.suffix if to_format is None else to_format
350
+ image = Image.open(path)
351
+
352
+ # Extract metadata
353
+ save_kwargs = {}
354
+ if exif := image.getexif():
355
+ save_kwargs["exif"] = exif
356
+ if icc := image.info.get("icc_profile"):
357
+ save_kwargs["icc_profile"] = icc
358
+ if xmp := image.info.get("xmp"):
359
+ save_kwargs["xmp"] = xmp
360
+
361
+ # # Convert image mode
362
+ # if image.mode not in ("RGB", "RGBA", "L"):
363
+ # image = image.convert("RGBA")
364
+
365
+ # Save
366
+ convert_func = self._IMAGE_CONVERTER.get(new_suffix, _image_convert_default)
367
+ # self.logger.debug(f"Using {convert_func}")
368
+ convert_func(image, path.with_suffix(new_suffix), **save_kwargs)
369
+
370
+ def img_convert(self, to_format: SupportedImageFormat | str, backup: bool = True) -> None:
371
+ """
372
+ Convert images in directory to desire format
373
+
374
+ Parameters
375
+ ----------
376
+ to_format : SupportedImageFormat
377
+ Format to convert
378
+
379
+ backup : bool
380
+ Move pictures to a backup folder
381
+
382
+ Raises
383
+ ------
384
+ NotImplementedError
385
+ Not supported image format
386
+ """
387
+ if to_format not in self._supported_image_format:
388
+ raise NotImplementedError("Format not supported")
389
+
390
+ imgs = self.select_all(*self._make_suffix_selection(to_format))
391
+
392
+ for x in tqdm(imgs, desc=f"Converting to {to_format}"):
393
+ try:
394
+ self._image_convert(x, to_format=to_format)
395
+
396
+ if backup:
397
+ self.backup_path.mkdir(parents=True, exist_ok=True)
398
+ self._make_backup(x)
399
+ except TypeError as err:
400
+ print(f" TYPE ERROR: {x} - {err}")
401
+ except Exception as err:
402
+ print(f" ERROR: {x} - {err}")
@@ -0,0 +1,291 @@
1
+ """
2
+ Absfuyu: Image duplicate checker
3
+ --------------------------------
4
+ Image duplicate checker
5
+
6
+
7
+ Version: 6.1.1
8
+ Date updated: 30/12/2025 (dd/mm/yyyy)
9
+ """
10
+
11
+ # Module level
12
+ # ---------------------------------------------------------------------------
13
+ __all__ = ["DirectoryRemoveDuplicateImageMixin"]
14
+
15
+
16
+ # Library
17
+ # ---------------------------------------------------------------------------
18
+ from collections.abc import Callable
19
+ from dataclasses import dataclass
20
+ from enum import StrEnum
21
+ from functools import partial, total_ordering
22
+ from pathlib import Path
23
+ from typing import Literal, NamedTuple
24
+
25
+ from absfuyu.core.dummy_func import tqdm as tqdm_base
26
+ from absfuyu.tools.checksum import DirectoryRemoveDuplicateMixin, DuplicateSummary
27
+
28
+ try:
29
+ import imagehash
30
+ from PIL import Image
31
+ except ImportError:
32
+ from subprocess import run
33
+
34
+ from absfuyu.config import ABSFUYU_CONFIG
35
+
36
+ if ABSFUYU_CONFIG._get_setting("auto-install-extra").value: # type: ignore
37
+ cmd = "python -m pip install -U absfuyu[pic]".split()
38
+ run(cmd)
39
+ else:
40
+ raise SystemExit("This feature is in absfuyu[pic] package") # noqa: B904
41
+
42
+ # Setup
43
+ # ---------------------------------------------------------------------------
44
+ tqdm = partial(tqdm_base, unit_scale=True, dynamic_ncols=True)
45
+ SupportedImageFormat = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
46
+
47
+
48
+ # Class
49
+ # ---------------------------------------------------------------------------
50
+ class HashMode(StrEnum):
51
+ PERCEPTUAL_HASH = "phash"
52
+ AVERAGE_HASH = "ahash"
53
+ DIFFERENCE_HASH = "dhash"
54
+ WAVELET_HASH = "whash"
55
+
56
+
57
+ class DuplicateImgPair(NamedTuple):
58
+ """
59
+ Duplicate image pair
60
+
61
+ Parameters
62
+ ----------
63
+ original : Path
64
+ Original image path
65
+
66
+ duplicate : Path
67
+ Duplicate image path
68
+
69
+ distant : int
70
+ Similarity between image (0 is exact)
71
+ """
72
+
73
+ original: Path
74
+ duplicate: Path
75
+ distant: int
76
+
77
+
78
+ @total_ordering
79
+ @dataclass
80
+ class ImageInfo:
81
+ """
82
+ Quick image info
83
+
84
+ Parameters
85
+ ----------
86
+ path : Path
87
+ Image path
88
+
89
+ file_size : int
90
+ File size
91
+
92
+ dimension : tuple[int, int]
93
+ Dimension (width, height)
94
+ """
95
+
96
+ path: Path
97
+ file_size: int
98
+ dimension: tuple[int, int]
99
+
100
+ def __eq__(self, other) -> bool:
101
+ if not isinstance(other, self.__class__):
102
+ raise NotImplementedError("Not implemented")
103
+ return self.dimension == other.dimension and self.file_size == other.file_size
104
+
105
+ def __lt__(self, other) -> bool:
106
+ if not isinstance(other, self.__class__):
107
+ raise NotImplementedError("Not implemented")
108
+
109
+ # prioritize dimension first, then size
110
+ if self.dimension != other.dimension:
111
+ return self.dimension < other.dimension
112
+ return self.file_size < other.file_size
113
+
114
+
115
+ class DirectoryRemoveDuplicateImageMixin(DirectoryRemoveDuplicateMixin):
116
+ """
117
+ Directory - Remove duplicate image
118
+
119
+ - remove_duplicate_images
120
+
121
+
122
+ Example:
123
+ --------
124
+ >>> DirectoryRemoveDuplicateImageMixin(".").remove_duplicate_images()
125
+ """
126
+
127
+ def __init__(self, source_path, create_if_not_exist=False) -> None:
128
+ super().__init__(source_path, create_if_not_exist)
129
+
130
+ # Unused yet
131
+ self._duplicate_image_cache = None
132
+
133
+ # Hash
134
+ def _get_img_hash_mode(
135
+ self, hash_mode: HashMode = HashMode.PERCEPTUAL_HASH
136
+ ) -> Callable[[Image, int], imagehash.ImageHash]:
137
+ """
138
+ Get image hash mode
139
+
140
+ Parameters
141
+ ----------
142
+ hash_mode : HashMode, optional
143
+ Hash mode, by default ``HashMode.PERCEPTUAL_HASH``
144
+
145
+ Returns
146
+ -------
147
+ Callable[[Image, int], imagehash.ImageHash]
148
+ Hash function
149
+ """
150
+ if hash_mode == HashMode.AVERAGE_HASH:
151
+ return imagehash.average_hash
152
+ elif hash_mode == HashMode.DIFFERENCE_HASH:
153
+ return imagehash.dhash
154
+ elif hash_mode == HashMode.WAVELET_HASH:
155
+ return imagehash.whash
156
+ else:
157
+ return imagehash.phash
158
+
159
+ def _gather_duplicate_image_cache(
160
+ self, recursive: bool = True, threshold: int = 5, hash_mode: HashMode = HashMode.PERCEPTUAL_HASH
161
+ ) -> None:
162
+ """
163
+ Gather duplicate image cache
164
+
165
+ Parameters
166
+ ----------
167
+ recursive : bool, optional
168
+ Scan every file in the folder (including child folder), by default ``True``
169
+
170
+ threshold : int, optional
171
+ Maximum hamming distance between image hashes to consider them "similar", by default ``5``
172
+ - 0: Exact image
173
+ - [5,10]: Tolerant of light edits
174
+
175
+ hash_mode : HashMode, optional
176
+ Hash mode, by default ``HashMode.PERCEPTUAL_HASH``
177
+ """
178
+ valid = [
179
+ x
180
+ for x in self.source_path.glob("**/*" if recursive else "*")
181
+ if x.is_file() and x.suffix.lower() in SupportedImageFormat
182
+ ]
183
+ hash_cache: dict[imagehash.ImageHash, list[Path]] = {}
184
+ duplicates: list[DuplicateImgPair] = []
185
+
186
+ # Checksum
187
+ for x in tqdm(valid, desc="Hashing image..."):
188
+ try:
189
+ with Image.open(x) as img:
190
+ hash_func = self._get_img_hash_mode(hash_mode=hash_mode)
191
+ hash = hash_func(img) # perceptual hash
192
+
193
+ except Exception as err:
194
+ print(f"ERROR: {x} - {err}")
195
+ continue
196
+
197
+ # Compare against all cached hashes
198
+ found = False
199
+ for existing_hash, paths in hash_cache.items():
200
+ distance = hash - existing_hash
201
+ if distance <= threshold:
202
+ duplicates.append(DuplicateImgPair(paths[0], x, distance))
203
+ if x not in paths:
204
+ paths.append(x)
205
+ found = True
206
+ break
207
+
208
+ if not found:
209
+ hash_cache[hash] = [x]
210
+
211
+ # Save to cache
212
+ self._duplicate_cache = DuplicateSummary({k: v for k, v in hash_cache.items() if len(v) > 1})
213
+ self._duplicate_image_cache = duplicates
214
+
215
+ # Remove
216
+ def _gather_img_info(self, image_path: Path) -> ImageInfo:
217
+ with Image.open(image_path) as img:
218
+ dim = img.size
219
+ return ImageInfo(image_path, image_path.stat().st_size, dim)
220
+
221
+ def _remove_duplicate_image_best(self, dry_run: bool = True, debug: bool = True) -> None:
222
+ """This will take image with large size in dimension and storage"""
223
+ if self._duplicate_cache is None or self._duplicate_image_cache is None:
224
+ raise ValueError("No duplicates found")
225
+
226
+ del_list: list[ImageInfo] = []
227
+ for paths in self._duplicate_cache.values():
228
+ # Sort image by dimension then size ascending order then cut the last value
229
+ data = sorted([self._gather_img_info(img) for img in paths])[:-1]
230
+ # Extend to delete list
231
+ del_list.extend(data)
232
+
233
+ for i, x in enumerate(del_list, start=1):
234
+ if debug:
235
+ print(f"{i:02}. Deleting {x.path}")
236
+ if not dry_run:
237
+ x.path.unlink(missing_ok=True)
238
+
239
+ # Main
240
+ def remove_duplicate_images(
241
+ self,
242
+ dry_run: bool = True,
243
+ recursive: bool = True,
244
+ threshold: int = 5,
245
+ hash_mode: HashMode = HashMode.PERCEPTUAL_HASH,
246
+ keep_mode: Literal["first", "last", "best"] = "best",
247
+ debug: bool = True,
248
+ ) -> None:
249
+ """
250
+ Remove duplicate images in a directory
251
+
252
+ Parameters
253
+ ----------
254
+ dry_run : bool, optional
255
+ Simulate only (no files deleted), by default ``True``
256
+
257
+ recursive : bool, optional
258
+ Scan every file in the folder (including child folder), by default ``True``
259
+
260
+ threshold : int, optional
261
+ Maximum hamming distance between image hashes to consider them "similar", by default ``5``
262
+ - 0: Exact image
263
+ - [5,10]: Tolerant of light edits
264
+
265
+ hash_mode : HashMode, optional
266
+ Hash mode, by default ``HashMode.PERCEPTUAL_HASH``
267
+
268
+ keep_mode : Literal["first", "last", "best"], optional
269
+ What to keep in duplicate images, by default ``"best"``
270
+ - "first": First item in delete list
271
+ - "last": Last item in delete list
272
+ - "best": Best item (largest dimension and size) in delete list
273
+
274
+ debug : bool, optional
275
+ Debug message, by default ``True``
276
+ """
277
+ # Cache
278
+ self._gather_duplicate_image_cache(recursive=recursive, threshold=threshold, hash_mode=hash_mode)
279
+
280
+ # Remove
281
+ try:
282
+ if keep_mode in ["first", "last"]:
283
+ summary = self._duplicate_cache
284
+ print(f"Duplicate files: {summary.summary()}")
285
+ summary.remove_duplicates(dry_run=dry_run, keep_first=keep_mode == "first", debug=debug)
286
+
287
+ else: # best mode
288
+ self._remove_duplicate_image_best(dry_run=dry_run, debug=debug)
289
+
290
+ except Exception as err:
291
+ pass