photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,173 +1,173 @@
1
- """Feature-based similarity methods with caching support."""
2
-
3
- from __future__ import annotations
4
-
5
- from abc import abstractmethod
6
-
7
- import cv2 as cv
8
- import numpy as np
9
- import numpy.typing as npt
10
- from PIL import Image
11
-
12
- from .base import ComparisonMethodName, SimilarityMethod
13
-
14
-
15
- class FeatureMethodBase(SimilarityMethod[npt.NDArray[np.float32] | npt.NDArray[np.uint8]]):
16
- """Base class for feature-based similarity methods."""
17
-
18
- def __init__(self, method_name: ComparisonMethodName, match_threshold: float) -> None:
19
- super().__init__(method_name)
20
- self.match_threshold = match_threshold
21
-
22
- @abstractmethod
23
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32] | npt.NDArray[np.uint8]:
24
- """Implement the actual preparation logic for feature descriptors."""
25
- pass
26
-
27
- @abstractmethod
28
- def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
29
- """Get the appropriate matcher for this feature type."""
30
- pass
31
-
32
- def _compare_prepared(
33
- self,
34
- prep1: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
35
- prep2: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
36
- ) -> float:
37
- """Compare feature descriptors using matching ratio."""
38
- min_features: int = min(len(prep1), len(prep2))
39
- if min_features < 2:
40
- return 0.0 # Not enough features to compare
41
-
42
- matcher: cv.FlannBasedMatcher | cv.BFMatcher = self._get_matcher()
43
- matches: list[tuple[cv.DMatch, ...]] = matcher.knnMatch(prep1, prep2, k=2)
44
-
45
- good_matches: list[cv.DMatch] = []
46
- for match_pair in matches:
47
- if len(match_pair) == 2:
48
- m, n = match_pair
49
- if m.distance < self.match_threshold * n.distance:
50
- good_matches.append(m)
51
-
52
- return len(good_matches) / min_features
53
-
54
-
55
- class SIFTMethod(FeatureMethodBase):
56
- """SIFT (Scale-Invariant Feature Transform) keypoint method."""
57
-
58
- LOWE_RATIO_THRESHOLD = 0.7 # Algorithmic constant
59
-
60
- def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
61
- super().__init__("sift", match_threshold)
62
- self.max_features = max_features
63
-
64
- def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
65
- """SIFT uses FLANN matcher for float descriptors."""
66
- return cv.FlannBasedMatcher()
67
-
68
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32]:
69
- """Prepare SIFT keypoint descriptors for the image."""
70
- img = Image.fromarray(pixels, mode="RGB")
71
- img = img.convert("L")
72
- gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
73
-
74
- # noinspection PyUnresolvedReferences
75
- sift: cv.SIFT = cv.SIFT_create(nfeatures=self.max_features)
76
- _keypoints, desc = sift.detectAndCompute(gray, None)
77
-
78
- if desc is None:
79
- # Return empty array with correct shape (0 features, 128 dimensions)
80
- return np.array([], dtype=np.float32).reshape(0, 128)
81
-
82
- # SIFT always returns float32 descriptors
83
- return desc.astype(np.float32)
84
-
85
-
86
- class AKAZEMethod(FeatureMethodBase):
87
- """AKAZE (Accelerated-KAZE) keypoint method."""
88
-
89
- LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
90
-
91
- def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
92
- super().__init__("akaze", match_threshold)
93
-
94
- def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
95
- """AKAZE uses BF matcher for binary descriptors."""
96
- return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
97
-
98
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
99
- """Prepare AKAZE keypoint descriptors for the image."""
100
- img = Image.fromarray(pixels, mode="RGB")
101
- img = img.convert("L")
102
- gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
103
-
104
- # noinspection PyUnresolvedReferences
105
- akaze = cv.AKAZE_create()
106
- _keypoints, desc = akaze.detectAndCompute(gray, None)
107
-
108
- if desc is None:
109
- # Return empty array with correct shape (0 features, 61 bytes for AKAZE)
110
- return np.array([], dtype=np.uint8).reshape(0, 61)
111
-
112
- # AKAZE returns binary (uint8) descriptors
113
- return desc.astype(np.uint8)
114
-
115
-
116
- class ORBMethod(FeatureMethodBase):
117
- """ORB (Oriented FAST and Rotated BRIEF) keypoint method."""
118
-
119
- LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
120
-
121
- def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
122
- super().__init__("orb", match_threshold)
123
- self.max_features = max_features
124
-
125
- def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
126
- """ORB uses BF matcher for binary descriptors."""
127
- return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
128
-
129
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
130
- """Prepare ORB keypoint descriptors for the image."""
131
- img = Image.fromarray(pixels, mode="RGB")
132
- img = img.convert("L")
133
- gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
134
-
135
- orb = cv.ORB_create(nfeatures=self.max_features)
136
- _keypoints, desc = orb.detectAndCompute(gray, None)
137
-
138
- if desc is None:
139
- # Return empty array with correct shape (0 features, 32 bytes for ORB)
140
- return np.array([], dtype=np.uint8).reshape(0, 32)
141
-
142
- # ORB returns binary (uint8) descriptors
143
- return desc.astype(np.uint8)
144
-
145
-
146
- class BRISKMethod(FeatureMethodBase):
147
- """BRISK (Binary Robust Invariant Scalable Keypoints) method."""
148
-
149
- LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
150
-
151
- def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
152
- super().__init__("brisk", match_threshold)
153
-
154
- def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
155
- """BRISK uses BF matcher for binary descriptors."""
156
- return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
157
-
158
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
159
- """Prepare BRISK keypoint descriptors for the image."""
160
- img = Image.fromarray(pixels, mode="RGB")
161
- img = img.convert("L")
162
- gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
163
-
164
- # noinspection PyUnresolvedReferences
165
- brisk = cv.BRISK_create()
166
- _keypoints, desc = brisk.detectAndCompute(gray, None)
167
-
168
- if desc is None:
169
- # Return empty array with correct shape (0 features, 64 bytes for BRISK)
170
- return np.array([], dtype=np.uint8).reshape(0, 64)
171
-
172
- # BRISK returns binary (uint8) descriptors
173
- return desc.astype(np.uint8)
1
+ """Feature-based similarity methods with caching support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import abstractmethod
6
+
7
+ import cv2 as cv
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ from PIL import Image
11
+
12
+ from .base import ComparisonMethodName, SimilarityMethod
13
+
14
+
15
+ class FeatureMethodBase(SimilarityMethod[npt.NDArray[np.float32] | npt.NDArray[np.uint8]]):
16
+ """Base class for feature-based similarity methods."""
17
+
18
+ def __init__(self, method_name: ComparisonMethodName, match_threshold: float) -> None:
19
+ super().__init__(method_name)
20
+ self.match_threshold = match_threshold
21
+
22
+ @abstractmethod
23
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32] | npt.NDArray[np.uint8]:
24
+ """Implement the actual preparation logic for feature descriptors."""
25
+ pass
26
+
27
+ @abstractmethod
28
+ def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
29
+ """Get the appropriate matcher for this feature type."""
30
+ pass
31
+
32
+ def _compare_prepared(
33
+ self,
34
+ prep1: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
35
+ prep2: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
36
+ ) -> float:
37
+ """Compare feature descriptors using matching ratio."""
38
+ min_features: int = min(len(prep1), len(prep2))
39
+ if min_features < 2:
40
+ return 0.0 # Not enough features to compare
41
+
42
+ matcher: cv.FlannBasedMatcher | cv.BFMatcher = self._get_matcher()
43
+ matches: list[tuple[cv.DMatch, ...]] = matcher.knnMatch(prep1, prep2, k=2)
44
+
45
+ good_matches: list[cv.DMatch] = []
46
+ for match_pair in matches:
47
+ if len(match_pair) == 2:
48
+ m, n = match_pair
49
+ if m.distance < self.match_threshold * n.distance:
50
+ good_matches.append(m)
51
+
52
+ return len(good_matches) / min_features
53
+
54
+
55
+ class SIFTMethod(FeatureMethodBase):
56
+ """SIFT (Scale-Invariant Feature Transform) keypoint method."""
57
+
58
+ LOWE_RATIO_THRESHOLD = 0.7 # Algorithmic constant
59
+
60
+ def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
61
+ super().__init__("sift", match_threshold)
62
+ self.max_features = max_features
63
+
64
+ def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
65
+ """SIFT uses FLANN matcher for float descriptors."""
66
+ return cv.FlannBasedMatcher()
67
+
68
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32]:
69
+ """Prepare SIFT keypoint descriptors for the image."""
70
+ img = Image.fromarray(pixels, mode="RGB")
71
+ img = img.convert("L")
72
+ gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
73
+
74
+ # noinspection PyUnresolvedReferences
75
+ sift: cv.SIFT = cv.SIFT_create(nfeatures=self.max_features)
76
+ _keypoints, desc = sift.detectAndCompute(gray, None)
77
+
78
+ if desc is None:
79
+ # Return empty array with correct shape (0 features, 128 dimensions)
80
+ return np.array([], dtype=np.float32).reshape(0, 128)
81
+
82
+ # SIFT always returns float32 descriptors
83
+ return desc.astype(np.float32)
84
+
85
+
86
+ class AKAZEMethod(FeatureMethodBase):
87
+ """AKAZE (Accelerated-KAZE) keypoint method."""
88
+
89
+ LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
90
+
91
+ def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
92
+ super().__init__("akaze", match_threshold)
93
+
94
+ def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
95
+ """AKAZE uses BF matcher for binary descriptors."""
96
+ return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
97
+
98
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
99
+ """Prepare AKAZE keypoint descriptors for the image."""
100
+ img = Image.fromarray(pixels, mode="RGB")
101
+ img = img.convert("L")
102
+ gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
103
+
104
+ # noinspection PyUnresolvedReferences
105
+ akaze = cv.AKAZE_create()
106
+ _keypoints, desc = akaze.detectAndCompute(gray, None)
107
+
108
+ if desc is None:
109
+ # Return empty array with correct shape (0 features, 61 bytes for AKAZE)
110
+ return np.array([], dtype=np.uint8).reshape(0, 61)
111
+
112
+ # AKAZE returns binary (uint8) descriptors
113
+ return desc.astype(np.uint8)
114
+
115
+
116
+ class ORBMethod(FeatureMethodBase):
117
+ """ORB (Oriented FAST and Rotated BRIEF) keypoint method."""
118
+
119
+ LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
120
+
121
+ def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
122
+ super().__init__("orb", match_threshold)
123
+ self.max_features = max_features
124
+
125
+ def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
126
+ """ORB uses BF matcher for binary descriptors."""
127
+ return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
128
+
129
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
130
+ """Prepare ORB keypoint descriptors for the image."""
131
+ img = Image.fromarray(pixels, mode="RGB")
132
+ img = img.convert("L")
133
+ gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
134
+
135
+ orb = cv.ORB_create(nfeatures=self.max_features)
136
+ _keypoints, desc = orb.detectAndCompute(gray, None)
137
+
138
+ if desc is None:
139
+ # Return empty array with correct shape (0 features, 32 bytes for ORB)
140
+ return np.array([], dtype=np.uint8).reshape(0, 32)
141
+
142
+ # ORB returns binary (uint8) descriptors
143
+ return desc.astype(np.uint8)
144
+
145
+
146
+ class BRISKMethod(FeatureMethodBase):
147
+ """BRISK (Binary Robust Invariant Scalable Keypoints) method."""
148
+
149
+ LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
150
+
151
+ def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
152
+ super().__init__("brisk", match_threshold)
153
+
154
+ def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
155
+ """BRISK uses BF matcher for binary descriptors."""
156
+ return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
157
+
158
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
159
+ """Prepare BRISK keypoint descriptors for the image."""
160
+ img = Image.fromarray(pixels, mode="RGB")
161
+ img = img.convert("L")
162
+ gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
163
+
164
+ # noinspection PyUnresolvedReferences
165
+ brisk = cv.BRISK_create()
166
+ _keypoints, desc = brisk.detectAndCompute(gray, None)
167
+
168
+ if desc is None:
169
+ # Return empty array with correct shape (0 features, 64 bytes for BRISK)
170
+ return np.array([], dtype=np.uint8).reshape(0, 64)
171
+
172
+ # BRISK returns binary (uint8) descriptors
173
+ return desc.astype(np.uint8)
@@ -1,29 +1,29 @@
1
- """File hashing utilities for duplicate detection."""
2
-
3
- from __future__ import annotations
4
-
5
- import hashlib
6
- from pathlib import Path
7
-
8
-
9
- def file_sha256(path: Path) -> str:
10
- """Compute SHA256 hash of file contents."""
11
- # Python 3.11+ streaming helper with default buffering for efficiency
12
- # Default buffering (typically 8KB) is critical for WSL/network filesystems
13
- with path.open("rb") as f:
14
- return hashlib.file_digest(f, "sha256").hexdigest()
15
-
16
-
17
- def binary_files_equal(pa: Path, pb: Path, chunk_size: int = 1 << 20) -> bool:
18
- """Return True iff files `a` and `b` are byte-for-byte identical."""
19
- sa, sb = pa.stat(), pb.stat()
20
- if sa.st_size != sb.st_size:
21
- return False
22
- with pa.open("rb") as fa, pb.open("rb") as fb:
23
- while True:
24
- ca = fa.read(chunk_size)
25
- cb = fb.read(chunk_size)
26
- if ca != cb:
27
- return False
28
- if not ca: # reached EOF on both
29
- return True
1
+ """File hashing utilities for duplicate detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ from pathlib import Path
7
+
8
+
9
+ def file_sha256(path: Path) -> str:
10
+ """Compute SHA256 hash of file contents."""
11
+ # Python 3.11+ streaming helper with default buffering for efficiency
12
+ # Default buffering (typically 8KB) is critical for WSL/network filesystems
13
+ with path.open("rb") as f:
14
+ return hashlib.file_digest(f, "sha256").hexdigest()
15
+
16
+
17
+ def binary_files_equal(pa: Path, pb: Path, chunk_size: int = 1 << 20) -> bool:
18
+ """Return True iff files `a` and `b` are byte-for-byte identical."""
19
+ sa, sb = pa.stat(), pb.stat()
20
+ if sa.st_size != sb.st_size:
21
+ return False
22
+ with pa.open("rb") as fa, pb.open("rb") as fb:
23
+ while True:
24
+ ca = fa.read(chunk_size)
25
+ cb = fb.read(chunk_size)
26
+ if ca != cb:
27
+ return False
28
+ if not ca: # reached EOF on both
29
+ return True
@@ -1,99 +1,99 @@
1
- """Hash-based similarity methods with caching support."""
2
-
3
- from __future__ import annotations
4
-
5
- import imagehash
6
- import numpy as np
7
- import numpy.typing as npt
8
- from PIL import Image
9
-
10
- from .base import BinningSimilarityMethod
11
- from .distance import hamming_similarity
12
-
13
-
14
- class AHashMethod(BinningSimilarityMethod[bytes, bytes]):
15
- """Average hash method - very fast, basic similarity detection."""
16
-
17
- def __init__(self, hash_size: int) -> None:
18
- super().__init__("ahash")
19
- self.hash_size = hash_size
20
-
21
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
22
- """Prepare average hash for the image."""
23
- img = Image.fromarray(pixels, mode="RGB")
24
- hash_obj = imagehash.average_hash(img, self.hash_size)
25
- return np.packbits(hash_obj.hash).tobytes()
26
-
27
- def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
28
- """Compare hash bytes using Hamming similarity."""
29
- return hamming_similarity(prep1, prep2)
30
-
31
- def _get_bin_key(self, prepared: bytes) -> bytes:
32
- """Use the hash itself as the bin key for exact matches."""
33
- return prepared
34
-
35
-
36
- class DHashMethod(BinningSimilarityMethod[bytes, bytes]):
37
- """Difference hash method - good for detecting crops and borders."""
38
-
39
- def __init__(self, hash_size: int) -> None:
40
- super().__init__("dhash")
41
- self.hash_size = hash_size
42
-
43
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
44
- """Prepare difference hash for the image."""
45
- img = Image.fromarray(pixels, mode="RGB")
46
- hash_obj = imagehash.dhash(img, self.hash_size)
47
- return np.packbits(hash_obj.hash).tobytes()
48
-
49
- def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
50
- """Compare hash bytes using Hamming similarity."""
51
- return hamming_similarity(prep1, prep2)
52
-
53
- def _get_bin_key(self, prepared: bytes) -> bytes:
54
- """Use the hash itself as the bin key for exact matches."""
55
- return prepared
56
-
57
-
58
- class PHashMethod(BinningSimilarityMethod[bytes, bytes]):
59
- """Perceptual hash method - DCT based, robust to minor changes."""
60
-
61
- def __init__(self, hash_size: int) -> None:
62
- super().__init__("phash")
63
- self.hash_size = hash_size
64
-
65
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
66
- """Prepare perceptual hash for the image."""
67
- img = Image.fromarray(pixels, mode="RGB")
68
- hash_obj = imagehash.phash(img, self.hash_size)
69
- return np.packbits(hash_obj.hash).tobytes()
70
-
71
- def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
72
- """Compare hash bytes using Hamming similarity."""
73
- return hamming_similarity(prep1, prep2)
74
-
75
- def _get_bin_key(self, prepared: bytes) -> bytes:
76
- """Use the hash itself as the bin key for exact matches."""
77
- return prepared
78
-
79
-
80
- class WHashMethod(BinningSimilarityMethod[bytes, bytes]):
81
- """Wavelet hash method - good for texture detection."""
82
-
83
- def __init__(self, hash_size: int) -> None:
84
- super().__init__("whash")
85
- self.hash_size = hash_size
86
-
87
- def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
88
- """Prepare wavelet hash for the image."""
89
- img = Image.fromarray(pixels, mode="RGB")
90
- hash_obj = imagehash.whash(img, self.hash_size)
91
- return np.packbits(hash_obj.hash).tobytes()
92
-
93
- def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
94
- """Compare hash bytes using Hamming similarity."""
95
- return hamming_similarity(prep1, prep2)
96
-
97
- def _get_bin_key(self, prepared: bytes) -> bytes:
98
- """Use the hash itself as the bin key for exact matches."""
99
- return prepared
1
+ """Hash-based similarity methods with caching support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import imagehash
6
+ import numpy as np
7
+ import numpy.typing as npt
8
+ from PIL import Image
9
+
10
+ from .base import BinningSimilarityMethod
11
+ from .distance import hamming_similarity
12
+
13
+
14
+ class AHashMethod(BinningSimilarityMethod[bytes, bytes]):
15
+ """Average hash method - very fast, basic similarity detection."""
16
+
17
+ def __init__(self, hash_size: int) -> None:
18
+ super().__init__("ahash")
19
+ self.hash_size = hash_size
20
+
21
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
22
+ """Prepare average hash for the image."""
23
+ img = Image.fromarray(pixels, mode="RGB")
24
+ hash_obj = imagehash.average_hash(img, self.hash_size)
25
+ return np.packbits(hash_obj.hash).tobytes()
26
+
27
+ def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
28
+ """Compare hash bytes using Hamming similarity."""
29
+ return hamming_similarity(prep1, prep2)
30
+
31
+ def _get_bin_key(self, prepared: bytes) -> bytes:
32
+ """Use the hash itself as the bin key for exact matches."""
33
+ return prepared
34
+
35
+
36
+ class DHashMethod(BinningSimilarityMethod[bytes, bytes]):
37
+ """Difference hash method - good for detecting crops and borders."""
38
+
39
+ def __init__(self, hash_size: int) -> None:
40
+ super().__init__("dhash")
41
+ self.hash_size = hash_size
42
+
43
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
44
+ """Prepare difference hash for the image."""
45
+ img = Image.fromarray(pixels, mode="RGB")
46
+ hash_obj = imagehash.dhash(img, self.hash_size)
47
+ return np.packbits(hash_obj.hash).tobytes()
48
+
49
+ def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
50
+ """Compare hash bytes using Hamming similarity."""
51
+ return hamming_similarity(prep1, prep2)
52
+
53
+ def _get_bin_key(self, prepared: bytes) -> bytes:
54
+ """Use the hash itself as the bin key for exact matches."""
55
+ return prepared
56
+
57
+
58
+ class PHashMethod(BinningSimilarityMethod[bytes, bytes]):
59
+ """Perceptual hash method - DCT based, robust to minor changes."""
60
+
61
+ def __init__(self, hash_size: int) -> None:
62
+ super().__init__("phash")
63
+ self.hash_size = hash_size
64
+
65
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
66
+ """Prepare perceptual hash for the image."""
67
+ img = Image.fromarray(pixels, mode="RGB")
68
+ hash_obj = imagehash.phash(img, self.hash_size)
69
+ return np.packbits(hash_obj.hash).tobytes()
70
+
71
+ def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
72
+ """Compare hash bytes using Hamming similarity."""
73
+ return hamming_similarity(prep1, prep2)
74
+
75
+ def _get_bin_key(self, prepared: bytes) -> bytes:
76
+ """Use the hash itself as the bin key for exact matches."""
77
+ return prepared
78
+
79
+
80
+ class WHashMethod(BinningSimilarityMethod[bytes, bytes]):
81
+ """Wavelet hash method - good for texture detection."""
82
+
83
+ def __init__(self, hash_size: int) -> None:
84
+ super().__init__("whash")
85
+ self.hash_size = hash_size
86
+
87
+ def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
88
+ """Prepare wavelet hash for the image."""
89
+ img = Image.fromarray(pixels, mode="RGB")
90
+ hash_obj = imagehash.whash(img, self.hash_size)
91
+ return np.packbits(hash_obj.hash).tobytes()
92
+
93
+ def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
94
+ """Compare hash bytes using Hamming similarity."""
95
+ return hamming_similarity(prep1, prep2)
96
+
97
+ def _get_bin_key(self, prepared: bytes) -> bytes:
98
+ """Use the hash itself as the bin key for exact matches."""
99
+ return prepared