pixmatch 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixmatch might be problematic. Click here for more details.
- pixmatch-0.0.1/LICENSE +19 -0
- pixmatch-0.0.1/PKG-INFO +93 -0
- pixmatch-0.0.1/README.md +71 -0
- pixmatch-0.0.1/pixmatch/__init__.py +444 -0
- pixmatch-0.0.1/pixmatch/__main__.py +48 -0
- pixmatch-0.0.1/pixmatch/gui/__init__.py +837 -0
- pixmatch-0.0.1/pixmatch/gui/pixmatch.ico +0 -0
- pixmatch-0.0.1/pixmatch/gui/utils.py +13 -0
- pixmatch-0.0.1/pixmatch/gui/widgets.py +656 -0
- pixmatch-0.0.1/pixmatch/gui/zip.png +0 -0
- pixmatch-0.0.1/pixmatch/utils.py +36 -0
- pixmatch-0.0.1/pixmatch.egg-info/PKG-INFO +93 -0
- pixmatch-0.0.1/pixmatch.egg-info/SOURCES.txt +16 -0
- pixmatch-0.0.1/pixmatch.egg-info/dependency_links.txt +1 -0
- pixmatch-0.0.1/pixmatch.egg-info/requires.txt +5 -0
- pixmatch-0.0.1/pixmatch.egg-info/top_level.txt +1 -0
- pixmatch-0.0.1/pyproject.toml +121 -0
- pixmatch-0.0.1/setup.cfg +4 -0
pixmatch-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright (c) 2018 The Python Packaging Authority
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
pixmatch-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pixmatch
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A modern VisiPics replacement.
|
|
5
|
+
Author-email: Ryan Heard <ryanwheard@gmail.com>
|
|
6
|
+
Project-URL: Repository, https://github.com/rheard/pixmatch
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
14
|
+
Requires-Python: >=3.9
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: Pillow
|
|
18
|
+
Requires-Dist: imagehash
|
|
19
|
+
Provides-Extra: gui
|
|
20
|
+
Requires-Dist: PySide6; extra == "gui"
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# PixMatch
|
|
24
|
+
|
|
25
|
+
PixMatch is a modern, cross-platform duplicate-image finder inspired by VisiPics, built with PySide6.
|
|
26
|
+
|
|
27
|
+

|
|
28
|
+
|
|
29
|
+
PixMatch scans folders (and ZIP archives) for visually similar images, groups matches,
|
|
30
|
+
and lets you quickly keep, ignore, or delete files from a clean GUI.
|
|
31
|
+
Rotated, mirrored or recompressed imgaes are no match for PixMatch!
|
|
32
|
+
PixMatch can even detect visually similar GIFs and animated WebP files.
|
|
33
|
+
Files inside ZIPs are treated as read-only “sources of truth”
|
|
34
|
+
—never deleted—so you can safely compare against archived libraries.
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
Supported extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.tif`, `.tiff`, `.bmp`, `.gif`, `.zip`.
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
## Install
|
|
41
|
+
|
|
42
|
+
PixMatch is a standard Python app (GUI via PySide6).
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
python -m pip install pixmatch[gui]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Running
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
python -m pixmatch
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Usage
|
|
55
|
+
|
|
56
|
+
Simply select some folders to parse and then click begin.
|
|
57
|
+
|
|
58
|
+
Once duplicate groups begin to appear in the duplicates view,
|
|
59
|
+
you can start to select actions for them and then execute those actions.
|
|
60
|
+
Clicking on a tile will cycle through actions, with red being delete, yellow being ignore, and green being no action.
|
|
61
|
+
|
|
62
|
+
Images which are in zips and cannot be deleted will have a rar icon to denote such,
|
|
63
|
+
and they cannot be marked for deletion.
|
|
64
|
+
|
|
65
|
+
The status bar under each image shows the full path, the file size, the uncompressed file size,
|
|
66
|
+
the frames in the image if it is an animated image, the image dimensions and the last modified date.
|
|
67
|
+
|
|
68
|
+
Basic status bar example:
|
|
69
|
+
|
|
70
|
+

|
|
71
|
+
|
|
72
|
+
Animated image status bar example:
|
|
73
|
+
|
|
74
|
+

|
|
75
|
+
|
|
76
|
+
#### Notes
|
|
77
|
+
* An exact match checkbox is provided. If strength is 10 and this checkbox is checked,
|
|
78
|
+
SHA-256 file hashes will be used instead of perceptual hashes.
|
|
79
|
+
|
|
80
|
+
#### Optional Args:
|
|
81
|
+
```markdown
|
|
82
|
+
positional arguments:
|
|
83
|
+
folders Folders to load into the selected file path display (to speed up testing).
|
|
84
|
+
|
|
85
|
+
options:
|
|
86
|
+
--verbose More detailed logging
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Acknowledgements
|
|
90
|
+
|
|
91
|
+
* Thanks to anyone who supported this effort, including the teams behind PySide6, Pillow, PyPI, and many other projects.
|
|
92
|
+
* Thanks to Johannes Buchner and the team behind imagehash, which serves as a large backbone in this application and saved me a lot of time.
|
|
93
|
+
* Thanks to Guillaume Fouet (aka Ozone) for VisiPics and the inspiration. Please don't be mad, I just wanted some new features like better gif and zip support.
|
pixmatch-0.0.1/README.md
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# PixMatch
|
|
2
|
+
|
|
3
|
+
PixMatch is a modern, cross-platform duplicate-image finder inspired by VisiPics, built with PySide6.
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
PixMatch scans folders (and ZIP archives) for visually similar images, groups matches,
|
|
8
|
+
and lets you quickly keep, ignore, or delete files from a clean GUI.
|
|
9
|
+
Rotated, mirrored or recompressed imgaes are no match for PixMatch!
|
|
10
|
+
PixMatch can even detect visually similar GIFs and animated WebP files.
|
|
11
|
+
Files inside ZIPs are treated as read-only “sources of truth”
|
|
12
|
+
—never deleted—so you can safely compare against archived libraries.
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
Supported extensions: `.jpg`, `.jpeg`, `.png`, `.webp`, `.tif`, `.tiff`, `.bmp`, `.gif`, `.zip`.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
PixMatch is a standard Python app (GUI via PySide6).
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
python -m pip install pixmatch[gui]
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Running
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
python -m pixmatch
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Usage
|
|
33
|
+
|
|
34
|
+
Simply select some folders to parse and then click begin.
|
|
35
|
+
|
|
36
|
+
Once duplicate groups begin to appear in the duplicates view,
|
|
37
|
+
you can start to select actions for them and then execute those actions.
|
|
38
|
+
Clicking on a tile will cycle through actions, with red being delete, yellow being ignore, and green being no action.
|
|
39
|
+
|
|
40
|
+
Images which are in zips and cannot be deleted will have a rar icon to denote such,
|
|
41
|
+
and they cannot be marked for deletion.
|
|
42
|
+
|
|
43
|
+
The status bar under each image shows the full path, the file size, the uncompressed file size,
|
|
44
|
+
the frames in the image if it is an animated image, the image dimensions and the last modified date.
|
|
45
|
+
|
|
46
|
+
Basic status bar example:
|
|
47
|
+
|
|
48
|
+

|
|
49
|
+
|
|
50
|
+
Animated image status bar example:
|
|
51
|
+
|
|
52
|
+

|
|
53
|
+
|
|
54
|
+
#### Notes
|
|
55
|
+
* An exact match checkbox is provided. If strength is 10 and this checkbox is checked,
|
|
56
|
+
SHA-256 file hashes will be used instead of perceptual hashes.
|
|
57
|
+
|
|
58
|
+
#### Optional Args:
|
|
59
|
+
```markdown
|
|
60
|
+
positional arguments:
|
|
61
|
+
folders Folders to load into the selected file path display (to speed up testing).
|
|
62
|
+
|
|
63
|
+
options:
|
|
64
|
+
--verbose More detailed logging
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Acknowledgements
|
|
68
|
+
|
|
69
|
+
* Thanks to anyone who supported this effort, including the teams behind PySide6, Pillow, PyPI, and many other projects.
|
|
70
|
+
* Thanks to Johannes Buchner and the team behind imagehash, which serves as a large backbone in this application and saved me a lot of time.
|
|
71
|
+
* Thanks to Guillaume Fouet (aka Ozone) for VisiPics and the inspiration. Please don't be mad, I just wanted some new features like better gif and zip support.
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from multiprocessing import Pool, Manager
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from threading import Event
|
|
11
|
+
from typing import Union
|
|
12
|
+
from zipfile import ZipFile
|
|
13
|
+
|
|
14
|
+
import imagehash
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from PIL import Image
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class ZipPath:
|
|
24
|
+
path: str
|
|
25
|
+
subpath: str
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def path_obj(self):
|
|
29
|
+
return Path(self.path)
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def is_gif(self) -> bool:
|
|
33
|
+
movie_extensions = {'.gif', '.webp'}
|
|
34
|
+
return (not self.subpath and Path(self.path).suffix.lower() in movie_extensions) \
|
|
35
|
+
or (self.subpath and self.subpath[-4:].lower() in movie_extensions)
|
|
36
|
+
|
|
37
|
+
def absolute(self):
|
|
38
|
+
return ZipPath(str(self.path_obj.absolute()), self.subpath)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_under(folder_abs: str, target: str | Path) -> bool:
|
|
42
|
+
"""Return True if the ZipPath's real file (zp.path) is inside folder_abs."""
|
|
43
|
+
try:
|
|
44
|
+
Path(target).absolute().relative_to(Path(folder_abs).absolute())
|
|
45
|
+
return True
|
|
46
|
+
except ValueError:
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def phash_params_for_strength(strength: int) -> tuple[int, int]:
|
|
51
|
+
# TODO: This sucks.
|
|
52
|
+
strength = max(0, min(10, strength))
|
|
53
|
+
if strength >= 10:
|
|
54
|
+
return 16, 4 # 256-bit hash, strict
|
|
55
|
+
elif strength >= 8:
|
|
56
|
+
return 15, 4
|
|
57
|
+
elif strength >= 7:
|
|
58
|
+
return 13, 4
|
|
59
|
+
elif strength >= 6:
|
|
60
|
+
return 11, 4
|
|
61
|
+
elif strength >= 5:
|
|
62
|
+
return 9, 4
|
|
63
|
+
elif strength >= 4:
|
|
64
|
+
return 8, 4
|
|
65
|
+
elif strength >= 3:
|
|
66
|
+
return 8, 3
|
|
67
|
+
elif strength >= 2:
|
|
68
|
+
return 7, 3
|
|
69
|
+
else:
|
|
70
|
+
return 6, 3
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def calculate_hashes(f, is_gif=False, strength=5, exact_match=False):
|
|
74
|
+
"""
|
|
75
|
+
Calculate hashes for a given file.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
f (IO or str or Path): Either a file path to process, or a in-memory BytesIO object ready for reading.
|
|
79
|
+
is_gif (bool): Is this gif data? Needed if passing an in-memory BytesIO object.
|
|
80
|
+
strength (int): A number between 0 and 10 on the strength of the matches.
|
|
81
|
+
exact_match (bool): Use exact SHA256 hahes?
|
|
82
|
+
If true, strength must be 10.
|
|
83
|
+
If false, perceptual hashes will be used, even with high strength.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
list: The found hashes.
|
|
87
|
+
"""
|
|
88
|
+
if exact_match:
|
|
89
|
+
hasher = hashlib.sha256()
|
|
90
|
+
block_size = 65536
|
|
91
|
+
with (open(f, "rb") if isinstance(f, (str, Path)) else f) as file:
|
|
92
|
+
for block in iter(lambda: file.read(block_size), b""):
|
|
93
|
+
hasher.update(block)
|
|
94
|
+
return [hasher.hexdigest()]
|
|
95
|
+
|
|
96
|
+
hash_size, highfreq_factor = phash_params_for_strength(strength)
|
|
97
|
+
with (Image.open(f) as im):
|
|
98
|
+
if is_gif:
|
|
99
|
+
initial_hash = imagehash.phash(im, hash_size=hash_size, highfreq_factor=highfreq_factor)
|
|
100
|
+
# This is going to be a bit confusing but basically, imagehash produces weird hashes for some gifs
|
|
101
|
+
# because some gifs have bad first frames consisting of nothing or only a single color...
|
|
102
|
+
# To deal with that I'm looking for these bad hashes here and if its one, we advance to the next frame
|
|
103
|
+
# and use THAT for imagehash instead.
|
|
104
|
+
# The ones we need to be on the lookout for are:
|
|
105
|
+
# 1. The hash is all 1111...
|
|
106
|
+
# 2. The hash is all 0000...
|
|
107
|
+
# 3. The hash is of the form 100000...
|
|
108
|
+
# TODO: This is simply not good enough. I'm still getting bad matches for gifs, tho they are extremely rare
|
|
109
|
+
val = initial_hash.hash[0][0]
|
|
110
|
+
while all(all(x == val for x in r) for r in initial_hash.hash) \
|
|
111
|
+
or all(all(x == np.False_ or (x_i == 0 and r_i == 0) for x_i, x in enumerate(r))
|
|
112
|
+
for r_i, r in enumerate(initial_hash.hash)):
|
|
113
|
+
try:
|
|
114
|
+
im.seek(im.tell() + 1)
|
|
115
|
+
except EOFError:
|
|
116
|
+
break
|
|
117
|
+
else:
|
|
118
|
+
initial_hash = imagehash.phash(im, hash_size=hash_size, highfreq_factor=highfreq_factor)
|
|
119
|
+
val = initial_hash.hash[0][0]
|
|
120
|
+
|
|
121
|
+
# For GIFs we'll look for mirrored versions but thats it
|
|
122
|
+
flipped_h_image = im.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
|
|
123
|
+
return [initial_hash, imagehash.phash(flipped_h_image, hash_size=hash_size, highfreq_factor=highfreq_factor)]
|
|
124
|
+
|
|
125
|
+
flipped_h_image = im.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
|
|
126
|
+
flipped_v_image = im.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
|
|
127
|
+
images = (im, im.rotate(90), im.rotate(180), im.rotate(270),
|
|
128
|
+
flipped_h_image, flipped_h_image.rotate(90), flipped_h_image.rotate(180), flipped_h_image.rotate(270),
|
|
129
|
+
flipped_v_image, flipped_v_image.rotate(90), flipped_v_image.rotate(180), flipped_v_image.rotate(270))
|
|
130
|
+
return [imagehash.phash(image, hash_size=hash_size, highfreq_factor=highfreq_factor) for image in images]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _process_image(path: str | Path, strength=5, exact_match=False):
|
|
134
|
+
path = Path(path)
|
|
135
|
+
if path.suffix.lower() != '.zip':
|
|
136
|
+
return path, calculate_hashes(path, is_gif=path.suffix.lower() in {".gif", ".webp"},
|
|
137
|
+
strength=strength, exact_match=exact_match)
|
|
138
|
+
|
|
139
|
+
results = dict()
|
|
140
|
+
with ZipFile(path) as zf:
|
|
141
|
+
for f in zf.filelist:
|
|
142
|
+
with zf.open(f) as zipped_file:
|
|
143
|
+
results[f.filename] = calculate_hashes(zipped_file, is_gif=f.filename[-4:].lower() in {".gif", ".webp"},
|
|
144
|
+
strength=strength, exact_match=exact_match)
|
|
145
|
+
|
|
146
|
+
return path, results
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class ImageMatch:
|
|
151
|
+
match_i: int | None = field(default=None)
|
|
152
|
+
matches: list[ZipPath] = field(default_factory=list)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass(frozen=True)
|
|
156
|
+
class NewGroup:
|
|
157
|
+
group: "ImageMatch" # forward-ref to your class
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@dataclass(frozen=True)
|
|
161
|
+
class NewMatch:
|
|
162
|
+
group: "ImageMatch"
|
|
163
|
+
path: ZipPath
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@dataclass(frozen=True)
|
|
167
|
+
class Finished:
|
|
168
|
+
pass
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
MatcherEvent = Union[NewGroup, NewMatch, Finished]
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# TODO: FINISHED signal?
|
|
175
|
+
class ImageMatcher:
|
|
176
|
+
SUPPORTED_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff", ".bmp", ".gif", ".zip"}
|
|
177
|
+
|
|
178
|
+
def __init__(self, strength: int = 5, exact_match: bool = False, processes: int | None = None,
|
|
179
|
+
extensions: set | None = None):
|
|
180
|
+
if not (0 <= strength <= 10):
|
|
181
|
+
raise ValueError("Strength must be between 0 and 10!")
|
|
182
|
+
|
|
183
|
+
self.extensions = extensions or self.SUPPORTED_EXTS
|
|
184
|
+
|
|
185
|
+
self.strength = strength
|
|
186
|
+
self.exact_match = exact_match
|
|
187
|
+
self.processes = processes
|
|
188
|
+
self.found_images = 0
|
|
189
|
+
self.processed_images = 0
|
|
190
|
+
self.duplicate_images = 0
|
|
191
|
+
|
|
192
|
+
m = Manager()
|
|
193
|
+
self.events = m.Queue()
|
|
194
|
+
self._new_paths = m.Queue()
|
|
195
|
+
self._removed_paths = set()
|
|
196
|
+
self._processed_paths = set()
|
|
197
|
+
self._hashes = defaultdict(ImageMatch)
|
|
198
|
+
self._reverse_hashes = dict()
|
|
199
|
+
|
|
200
|
+
self._not_paused = Event()
|
|
201
|
+
self._not_paused.set()
|
|
202
|
+
self._finished = Event()
|
|
203
|
+
self._finished.set()
|
|
204
|
+
|
|
205
|
+
self.matches = []
|
|
206
|
+
|
|
207
|
+
def add_path(self, path: str | Path):
|
|
208
|
+
path = str(Path(path).absolute())
|
|
209
|
+
self._removed_paths.discard(path)
|
|
210
|
+
self._new_paths.put(path)
|
|
211
|
+
|
|
212
|
+
def remove_path(self, folder: str | Path) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Mark a folder to be skipped going forward, and remove already-indexed files
|
|
215
|
+
that live under it. Pauses briefly if not already paused to keep state sane.
|
|
216
|
+
"""
|
|
217
|
+
folder = str(Path(folder).absolute())
|
|
218
|
+
paused = self.conditional_pause()
|
|
219
|
+
self._removed_paths.add(folder)
|
|
220
|
+
self._processed_paths.discard(folder)
|
|
221
|
+
|
|
222
|
+
# Remove anything we've already seen under that folder
|
|
223
|
+
# (iterate over a copy because remove() mutates structures)
|
|
224
|
+
to_remove = [p for p in self._reverse_hashes.keys() if _is_under(folder, p.path)]
|
|
225
|
+
for p in to_remove:
|
|
226
|
+
self.remove(p)
|
|
227
|
+
|
|
228
|
+
self.conditional_resume(paused)
|
|
229
|
+
|
|
230
|
+
@property
|
|
231
|
+
def left_to_process(self):
|
|
232
|
+
return self.found_images - self.processed_images
|
|
233
|
+
|
|
234
|
+
def pause(self):
|
|
235
|
+
logger.debug('Performing pause')
|
|
236
|
+
self._not_paused.clear()
|
|
237
|
+
|
|
238
|
+
def conditional_pause(self):
|
|
239
|
+
_conditional_pause = self.is_paused()
|
|
240
|
+
if not _conditional_pause:
|
|
241
|
+
logger.debug('Performing conditional pause')
|
|
242
|
+
self.pause()
|
|
243
|
+
|
|
244
|
+
return _conditional_pause
|
|
245
|
+
|
|
246
|
+
def conditional_resume(self, was_paused):
|
|
247
|
+
if not was_paused and not self.is_finished():
|
|
248
|
+
logger.debug('Performing conditional resume')
|
|
249
|
+
self.resume()
|
|
250
|
+
|
|
251
|
+
def is_paused(self):
|
|
252
|
+
return not self._not_paused.is_set()
|
|
253
|
+
|
|
254
|
+
def finish(self):
|
|
255
|
+
logger.debug('Performing finished')
|
|
256
|
+
self._finished.set()
|
|
257
|
+
|
|
258
|
+
def is_finished(self):
|
|
259
|
+
return self._finished.is_set()
|
|
260
|
+
|
|
261
|
+
def resume(self):
|
|
262
|
+
logger.debug('Performing resume')
|
|
263
|
+
self._not_paused.set()
|
|
264
|
+
|
|
265
|
+
def running(self):
|
|
266
|
+
return not self.is_paused() and (not self.is_finished() or self.left_to_process)
|
|
267
|
+
|
|
268
|
+
def remove(self, path):
|
|
269
|
+
# Pause things while we remove things...
|
|
270
|
+
logger.info('Removing %s from %s', path, self.__class__.__name__)
|
|
271
|
+
paused = self.conditional_pause()
|
|
272
|
+
|
|
273
|
+
hash = self._reverse_hashes.pop(path)
|
|
274
|
+
self._hashes[hash].matches.remove(path)
|
|
275
|
+
if len(self._hashes[hash].matches) == 1:
|
|
276
|
+
match_i = self._hashes[hash].match_i
|
|
277
|
+
logger.debug('Unmatching match group %s', match_i)
|
|
278
|
+
self._hashes[hash].match_i = None
|
|
279
|
+
|
|
280
|
+
del self.matches[match_i]
|
|
281
|
+
self.refresh_match_indexes(match_i)
|
|
282
|
+
self.duplicate_images -= 2
|
|
283
|
+
|
|
284
|
+
elif not self._hashes[hash].matches:
|
|
285
|
+
logger.debug('Removing empty match group')
|
|
286
|
+
del self._hashes[hash]
|
|
287
|
+
|
|
288
|
+
else:
|
|
289
|
+
logger.debug('Simple removal performed')
|
|
290
|
+
self.duplicate_images -= 1
|
|
291
|
+
|
|
292
|
+
self.processed_images -= 1
|
|
293
|
+
self.found_images -= 1
|
|
294
|
+
self.conditional_resume(paused)
|
|
295
|
+
|
|
296
|
+
def refresh_match_indexes(self, start=0):
|
|
297
|
+
for match_i, match in enumerate(self.matches[start:], start=start):
|
|
298
|
+
match.match_i = match_i
|
|
299
|
+
|
|
300
|
+
def _process_image_callback(self, result):
|
|
301
|
+
self._not_paused.wait()
|
|
302
|
+
if self.is_finished():
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
path: Path | str | ZipPath
|
|
306
|
+
path, hashes = result
|
|
307
|
+
|
|
308
|
+
if any(_is_under(d, path.path if isinstance(path, ZipPath) else path) for d in self._removed_paths):
|
|
309
|
+
self.found_images -= 1
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
if isinstance(hashes, dict):
|
|
313
|
+
for sub_path, sub_hashes in hashes.items():
|
|
314
|
+
self._process_image_callback((ZipPath(str(path), sub_path), sub_hashes))
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
if not isinstance(path, ZipPath):
|
|
318
|
+
path = ZipPath(str(path), "")
|
|
319
|
+
|
|
320
|
+
if path in self._reverse_hashes:
|
|
321
|
+
self.found_images -= 1
|
|
322
|
+
return
|
|
323
|
+
|
|
324
|
+
self.processed_images += 1
|
|
325
|
+
for hash_ in hashes:
|
|
326
|
+
if hash_ not in self._hashes:
|
|
327
|
+
continue
|
|
328
|
+
|
|
329
|
+
self._reverse_hashes[path] = hash_
|
|
330
|
+
|
|
331
|
+
# This appears to be a new match!
|
|
332
|
+
for match in self._hashes[hash_].matches:
|
|
333
|
+
if path.absolute() == match.absolute():
|
|
334
|
+
# This appears to be a duplicate PATH...
|
|
335
|
+
logger.warning('Duplicate files entered! %s, %s', path, match)
|
|
336
|
+
return
|
|
337
|
+
|
|
338
|
+
self._hashes[hash_].matches.append(path)
|
|
339
|
+
if self._hashes[hash_].match_i is None and len(self._hashes[hash_].matches) >= 2:
|
|
340
|
+
# This is a brand new match group!
|
|
341
|
+
self._hashes[hash_].match_i = len(self.matches)
|
|
342
|
+
self.matches.append(self._hashes[hash_])
|
|
343
|
+
self.duplicate_images += 2
|
|
344
|
+
self.events.put(NewGroup(self._hashes[hash_]))
|
|
345
|
+
logger.debug('New match group found: %s', self._hashes[hash_].matches)
|
|
346
|
+
else:
|
|
347
|
+
# Just another match for an existing group...
|
|
348
|
+
self.duplicate_images += 1
|
|
349
|
+
self.events.put(NewMatch(self._hashes[hash_], path))
|
|
350
|
+
logger.debug('New match found for group #%s: %s',
|
|
351
|
+
self._hashes[hash_].match_i,
|
|
352
|
+
self._hashes[hash_].matches)
|
|
353
|
+
|
|
354
|
+
break
|
|
355
|
+
else:
|
|
356
|
+
# This is a new hash, so just add it to the hashmap and move on...
|
|
357
|
+
# Just use the initial orientation
|
|
358
|
+
hash_ = hashes[0]
|
|
359
|
+
self._reverse_hashes[path] = hash_
|
|
360
|
+
self._hashes[hash_].matches.append(path)
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
def _process_image_error_callback(self, e):
|
|
364
|
+
self.processed_images += 1
|
|
365
|
+
print(str(e))
|
|
366
|
+
|
|
367
|
+
def _root_stream(self):
|
|
368
|
+
# Yield any paths that come up for processing, then wait until processing is finished for any new paths
|
|
369
|
+
while not self._new_paths.empty() or self.left_to_process:
|
|
370
|
+
if self._new_paths.empty():
|
|
371
|
+
time.sleep(0.05)
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
yield self._new_paths.get_nowait()
|
|
375
|
+
|
|
376
|
+
def run(self, paths: list[str | Path]):
|
|
377
|
+
# TODO: Verify none of the paths overlap
|
|
378
|
+
# TODO: Verify none of the dirs have been deleted after we started
|
|
379
|
+
|
|
380
|
+
self._not_paused.set()
|
|
381
|
+
self._finished.clear()
|
|
382
|
+
|
|
383
|
+
for path in paths:
|
|
384
|
+
self.add_path(path)
|
|
385
|
+
|
|
386
|
+
with Pool(self.processes) as tp:
|
|
387
|
+
for path in self._root_stream():
|
|
388
|
+
path = Path(path)
|
|
389
|
+
if not path.is_dir():
|
|
390
|
+
logger.warning('A path was entered that was not a directory : %s', path)
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
path = str(path.absolute())
|
|
394
|
+
if path in self._removed_paths or path in self._processed_paths:
|
|
395
|
+
continue
|
|
396
|
+
|
|
397
|
+
for root, dirs, files in os.walk(path):
|
|
398
|
+
if self.is_finished():
|
|
399
|
+
break
|
|
400
|
+
|
|
401
|
+
root = Path(root)
|
|
402
|
+
|
|
403
|
+
if any(_is_under(d, root) for d in self._removed_paths):
|
|
404
|
+
continue
|
|
405
|
+
|
|
406
|
+
for f in files:
|
|
407
|
+
self._not_paused.wait()
|
|
408
|
+
if self.is_finished():
|
|
409
|
+
break
|
|
410
|
+
|
|
411
|
+
f = root / f
|
|
412
|
+
|
|
413
|
+
if f.suffix.lower() not in self.extensions:
|
|
414
|
+
continue
|
|
415
|
+
|
|
416
|
+
if any(_is_under(d, f) for d in self._removed_paths):
|
|
417
|
+
continue
|
|
418
|
+
|
|
419
|
+
# TODO: This sucks (for zips at least), but I can't iterate over the dict while its changing...
|
|
420
|
+
if ZipPath(str(f), "") in self._reverse_hashes:
|
|
421
|
+
continue
|
|
422
|
+
|
|
423
|
+
self.found_images += 1
|
|
424
|
+
tp.apply_async(
|
|
425
|
+
_process_image,
|
|
426
|
+
args=(f, ),
|
|
427
|
+
kwds={
|
|
428
|
+
'strength': self.strength,
|
|
429
|
+
'exact_match': self.exact_match,
|
|
430
|
+
},
|
|
431
|
+
callback=self._process_image_callback,
|
|
432
|
+
error_callback=self._process_image_error_callback,
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
self._processed_paths.add(path)
|
|
436
|
+
|
|
437
|
+
tp.close()
|
|
438
|
+
|
|
439
|
+
if not self.is_finished():
|
|
440
|
+
tp.join()
|
|
441
|
+
|
|
442
|
+
if not self.is_finished():
|
|
443
|
+
self._finished.set()
|
|
444
|
+
self.events.put(Finished())
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
import platform
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from PySide6 import QtWidgets
|
|
8
|
+
|
|
9
|
+
from pixmatch.gui import MainWindow
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
if __name__ == "__main__":
|
|
13
|
+
parser = argparse.ArgumentParser(
|
|
14
|
+
description="Process zero or more file paths."
|
|
15
|
+
)
|
|
16
|
+
parser.add_argument(
|
|
17
|
+
"folders",
|
|
18
|
+
nargs="*",
|
|
19
|
+
type=Path,
|
|
20
|
+
help="Folders to load into the selected file path display (to speed up testing).",
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument('--verbose', action='store_true', help="More detailed logging")
|
|
23
|
+
args = parser.parse_args()
|
|
24
|
+
|
|
25
|
+
logging.basicConfig(
|
|
26
|
+
level=logging.DEBUG if args.verbose else logging.INFO,
|
|
27
|
+
format='%(module)s::%(funcName)s::%(lineno)d %(levelname)s %(asctime)s - %(message)s',
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
if platform.system() == "Windows":
|
|
31
|
+
# Need to tell Windows to not use the Python app icon and use the Window icon isntead...
|
|
32
|
+
# I'm not sure on the specifics but calling this method with any string seems to do the trick....
|
|
33
|
+
# https://stackoverflow.com/questions/1551605
|
|
34
|
+
import ctypes
|
|
35
|
+
ctypes.windll.shell32.SetCurrentProcessExplicitAppUserModelID('company.app.1')
|
|
36
|
+
|
|
37
|
+
app = QtWidgets.QApplication([])
|
|
38
|
+
# Basic stylesheet for subtle polish without complexity.
|
|
39
|
+
app.setStyleSheet(
|
|
40
|
+
"""
|
|
41
|
+
QToolBar { spacing: 8px; }
|
|
42
|
+
QLabel#GroupTitle { padding: 4px 0; }
|
|
43
|
+
QFrame#ImageTile { border: 1px solid #444; border-radius: 6px; padding: 6px; }
|
|
44
|
+
"""
|
|
45
|
+
)
|
|
46
|
+
w = MainWindow(args.folders)
|
|
47
|
+
w.show()
|
|
48
|
+
app.exec()
|