photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +2 -2
- orchestrator/app.py +6 -11
- orchestrator/build_pipeline.py +19 -21
- orchestrator/orchestrator_runner.py +11 -8
- orchestrator/pipeline_builder.py +126 -126
- orchestrator/pipeline_orchestrator.py +604 -604
- orchestrator/review_persistence.py +162 -162
- orchestrator/static/orchestrator.css +76 -76
- orchestrator/static/orchestrator.html +11 -5
- orchestrator/static/orchestrator.js +3 -1
- overlap_metrics/__init__.py +1 -1
- overlap_metrics/config.py +135 -135
- overlap_metrics/core.py +284 -284
- overlap_metrics/estimators.py +292 -292
- overlap_metrics/metrics.py +307 -307
- overlap_metrics/registry.py +99 -99
- overlap_metrics/utils.py +104 -104
- photo_compare/__init__.py +1 -1
- photo_compare/base.py +285 -285
- photo_compare/config.py +225 -225
- photo_compare/distance.py +15 -15
- photo_compare/feature_methods.py +173 -173
- photo_compare/file_hash.py +29 -29
- photo_compare/hash_methods.py +99 -99
- photo_compare/histogram_methods.py +118 -118
- photo_compare/pixel_methods.py +58 -58
- photo_compare/structural_methods.py +104 -104
- photo_compare/types.py +28 -28
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
- photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
- scripts/orchestrate.py +12 -10
- utils/__init__.py +4 -3
- utils/base_pipeline_stage.py +171 -171
- utils/base_ports.py +176 -176
- utils/benchmark_utils.py +823 -823
- utils/channel.py +74 -74
- utils/comparison_gates.py +40 -21
- utils/compute_benchmarks.py +355 -355
- utils/compute_identical.py +94 -24
- utils/compute_indices.py +235 -235
- utils/compute_perceptual_hash.py +127 -127
- utils/compute_perceptual_match.py +240 -240
- utils/compute_sha_bins.py +64 -20
- utils/compute_template_similarity.py +1 -1
- utils/compute_versions.py +483 -483
- utils/config.py +8 -5
- utils/data_io.py +83 -83
- utils/graph_context.py +44 -44
- utils/logger.py +2 -2
- utils/models.py +2 -2
- utils/photo_file.py +90 -91
- utils/pipeline_graph.py +334 -334
- utils/pipeline_stage.py +408 -408
- utils/plot_helpers.py +123 -123
- utils/ports.py +136 -136
- utils/progress.py +415 -415
- utils/report_builder.py +139 -139
- utils/review_types.py +55 -55
- utils/review_utils.py +10 -19
- utils/sequence.py +10 -8
- utils/sequence_clustering.py +1 -1
- utils/template.py +57 -57
- utils/template_parsing.py +71 -0
- photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
photo_compare/feature_methods.py
CHANGED
|
@@ -1,173 +1,173 @@
|
|
|
1
|
-
"""Feature-based similarity methods with caching support."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from abc import abstractmethod
|
|
6
|
-
|
|
7
|
-
import cv2 as cv
|
|
8
|
-
import numpy as np
|
|
9
|
-
import numpy.typing as npt
|
|
10
|
-
from PIL import Image
|
|
11
|
-
|
|
12
|
-
from .base import ComparisonMethodName, SimilarityMethod
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class FeatureMethodBase(SimilarityMethod[npt.NDArray[np.float32] | npt.NDArray[np.uint8]]):
|
|
16
|
-
"""Base class for feature-based similarity methods."""
|
|
17
|
-
|
|
18
|
-
def __init__(self, method_name: ComparisonMethodName, match_threshold: float) -> None:
|
|
19
|
-
super().__init__(method_name)
|
|
20
|
-
self.match_threshold = match_threshold
|
|
21
|
-
|
|
22
|
-
@abstractmethod
|
|
23
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32] | npt.NDArray[np.uint8]:
|
|
24
|
-
"""Implement the actual preparation logic for feature descriptors."""
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
@abstractmethod
|
|
28
|
-
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
29
|
-
"""Get the appropriate matcher for this feature type."""
|
|
30
|
-
pass
|
|
31
|
-
|
|
32
|
-
def _compare_prepared(
|
|
33
|
-
self,
|
|
34
|
-
prep1: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
|
|
35
|
-
prep2: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
|
|
36
|
-
) -> float:
|
|
37
|
-
"""Compare feature descriptors using matching ratio."""
|
|
38
|
-
min_features: int = min(len(prep1), len(prep2))
|
|
39
|
-
if min_features < 2:
|
|
40
|
-
return 0.0 # Not enough features to compare
|
|
41
|
-
|
|
42
|
-
matcher: cv.FlannBasedMatcher | cv.BFMatcher = self._get_matcher()
|
|
43
|
-
matches: list[tuple[cv.DMatch, ...]] = matcher.knnMatch(prep1, prep2, k=2)
|
|
44
|
-
|
|
45
|
-
good_matches: list[cv.DMatch] = []
|
|
46
|
-
for match_pair in matches:
|
|
47
|
-
if len(match_pair) == 2:
|
|
48
|
-
m, n = match_pair
|
|
49
|
-
if m.distance < self.match_threshold * n.distance:
|
|
50
|
-
good_matches.append(m)
|
|
51
|
-
|
|
52
|
-
return len(good_matches) / min_features
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class SIFTMethod(FeatureMethodBase):
|
|
56
|
-
"""SIFT (Scale-Invariant Feature Transform) keypoint method."""
|
|
57
|
-
|
|
58
|
-
LOWE_RATIO_THRESHOLD = 0.7 # Algorithmic constant
|
|
59
|
-
|
|
60
|
-
def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
61
|
-
super().__init__("sift", match_threshold)
|
|
62
|
-
self.max_features = max_features
|
|
63
|
-
|
|
64
|
-
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
65
|
-
"""SIFT uses FLANN matcher for float descriptors."""
|
|
66
|
-
return cv.FlannBasedMatcher()
|
|
67
|
-
|
|
68
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32]:
|
|
69
|
-
"""Prepare SIFT keypoint descriptors for the image."""
|
|
70
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
71
|
-
img = img.convert("L")
|
|
72
|
-
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
73
|
-
|
|
74
|
-
# noinspection PyUnresolvedReferences
|
|
75
|
-
sift: cv.SIFT = cv.SIFT_create(nfeatures=self.max_features)
|
|
76
|
-
_keypoints, desc = sift.detectAndCompute(gray, None)
|
|
77
|
-
|
|
78
|
-
if desc is None:
|
|
79
|
-
# Return empty array with correct shape (0 features, 128 dimensions)
|
|
80
|
-
return np.array([], dtype=np.float32).reshape(0, 128)
|
|
81
|
-
|
|
82
|
-
# SIFT always returns float32 descriptors
|
|
83
|
-
return desc.astype(np.float32)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
class AKAZEMethod(FeatureMethodBase):
|
|
87
|
-
"""AKAZE (Accelerated-KAZE) keypoint method."""
|
|
88
|
-
|
|
89
|
-
LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
|
|
90
|
-
|
|
91
|
-
def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
92
|
-
super().__init__("akaze", match_threshold)
|
|
93
|
-
|
|
94
|
-
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
95
|
-
"""AKAZE uses BF matcher for binary descriptors."""
|
|
96
|
-
return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
|
|
97
|
-
|
|
98
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
|
|
99
|
-
"""Prepare AKAZE keypoint descriptors for the image."""
|
|
100
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
101
|
-
img = img.convert("L")
|
|
102
|
-
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
103
|
-
|
|
104
|
-
# noinspection PyUnresolvedReferences
|
|
105
|
-
akaze = cv.AKAZE_create()
|
|
106
|
-
_keypoints, desc = akaze.detectAndCompute(gray, None)
|
|
107
|
-
|
|
108
|
-
if desc is None:
|
|
109
|
-
# Return empty array with correct shape (0 features, 61 bytes for AKAZE)
|
|
110
|
-
return np.array([], dtype=np.uint8).reshape(0, 61)
|
|
111
|
-
|
|
112
|
-
# AKAZE returns binary (uint8) descriptors
|
|
113
|
-
return desc.astype(np.uint8)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
class ORBMethod(FeatureMethodBase):
|
|
117
|
-
"""ORB (Oriented FAST and Rotated BRIEF) keypoint method."""
|
|
118
|
-
|
|
119
|
-
LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
|
|
120
|
-
|
|
121
|
-
def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
122
|
-
super().__init__("orb", match_threshold)
|
|
123
|
-
self.max_features = max_features
|
|
124
|
-
|
|
125
|
-
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
126
|
-
"""ORB uses BF matcher for binary descriptors."""
|
|
127
|
-
return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
|
|
128
|
-
|
|
129
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
|
|
130
|
-
"""Prepare ORB keypoint descriptors for the image."""
|
|
131
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
132
|
-
img = img.convert("L")
|
|
133
|
-
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
134
|
-
|
|
135
|
-
orb = cv.ORB_create(nfeatures=self.max_features)
|
|
136
|
-
_keypoints, desc = orb.detectAndCompute(gray, None)
|
|
137
|
-
|
|
138
|
-
if desc is None:
|
|
139
|
-
# Return empty array with correct shape (0 features, 32 bytes for ORB)
|
|
140
|
-
return np.array([], dtype=np.uint8).reshape(0, 32)
|
|
141
|
-
|
|
142
|
-
# ORB returns binary (uint8) descriptors
|
|
143
|
-
return desc.astype(np.uint8)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
class BRISKMethod(FeatureMethodBase):
|
|
147
|
-
"""BRISK (Binary Robust Invariant Scalable Keypoints) method."""
|
|
148
|
-
|
|
149
|
-
LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
|
|
150
|
-
|
|
151
|
-
def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
152
|
-
super().__init__("brisk", match_threshold)
|
|
153
|
-
|
|
154
|
-
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
155
|
-
"""BRISK uses BF matcher for binary descriptors."""
|
|
156
|
-
return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
|
|
157
|
-
|
|
158
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
|
|
159
|
-
"""Prepare BRISK keypoint descriptors for the image."""
|
|
160
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
161
|
-
img = img.convert("L")
|
|
162
|
-
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
163
|
-
|
|
164
|
-
# noinspection PyUnresolvedReferences
|
|
165
|
-
brisk = cv.BRISK_create()
|
|
166
|
-
_keypoints, desc = brisk.detectAndCompute(gray, None)
|
|
167
|
-
|
|
168
|
-
if desc is None:
|
|
169
|
-
# Return empty array with correct shape (0 features, 64 bytes for BRISK)
|
|
170
|
-
return np.array([], dtype=np.uint8).reshape(0, 64)
|
|
171
|
-
|
|
172
|
-
# BRISK returns binary (uint8) descriptors
|
|
173
|
-
return desc.astype(np.uint8)
|
|
1
|
+
"""Feature-based similarity methods with caching support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
|
|
7
|
+
import cv2 as cv
|
|
8
|
+
import numpy as np
|
|
9
|
+
import numpy.typing as npt
|
|
10
|
+
from PIL import Image
|
|
11
|
+
|
|
12
|
+
from .base import ComparisonMethodName, SimilarityMethod
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FeatureMethodBase(SimilarityMethod[npt.NDArray[np.float32] | npt.NDArray[np.uint8]]):
|
|
16
|
+
"""Base class for feature-based similarity methods."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, method_name: ComparisonMethodName, match_threshold: float) -> None:
|
|
19
|
+
super().__init__(method_name)
|
|
20
|
+
self.match_threshold = match_threshold
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32] | npt.NDArray[np.uint8]:
|
|
24
|
+
"""Implement the actual preparation logic for feature descriptors."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
29
|
+
"""Get the appropriate matcher for this feature type."""
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def _compare_prepared(
|
|
33
|
+
self,
|
|
34
|
+
prep1: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
|
|
35
|
+
prep2: npt.NDArray[np.float32] | npt.NDArray[np.uint8],
|
|
36
|
+
) -> float:
|
|
37
|
+
"""Compare feature descriptors using matching ratio."""
|
|
38
|
+
min_features: int = min(len(prep1), len(prep2))
|
|
39
|
+
if min_features < 2:
|
|
40
|
+
return 0.0 # Not enough features to compare
|
|
41
|
+
|
|
42
|
+
matcher: cv.FlannBasedMatcher | cv.BFMatcher = self._get_matcher()
|
|
43
|
+
matches: list[tuple[cv.DMatch, ...]] = matcher.knnMatch(prep1, prep2, k=2)
|
|
44
|
+
|
|
45
|
+
good_matches: list[cv.DMatch] = []
|
|
46
|
+
for match_pair in matches:
|
|
47
|
+
if len(match_pair) == 2:
|
|
48
|
+
m, n = match_pair
|
|
49
|
+
if m.distance < self.match_threshold * n.distance:
|
|
50
|
+
good_matches.append(m)
|
|
51
|
+
|
|
52
|
+
return len(good_matches) / min_features
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SIFTMethod(FeatureMethodBase):
|
|
56
|
+
"""SIFT (Scale-Invariant Feature Transform) keypoint method."""
|
|
57
|
+
|
|
58
|
+
LOWE_RATIO_THRESHOLD = 0.7 # Algorithmic constant
|
|
59
|
+
|
|
60
|
+
def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
61
|
+
super().__init__("sift", match_threshold)
|
|
62
|
+
self.max_features = max_features
|
|
63
|
+
|
|
64
|
+
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
65
|
+
"""SIFT uses FLANN matcher for float descriptors."""
|
|
66
|
+
return cv.FlannBasedMatcher()
|
|
67
|
+
|
|
68
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.float32]:
|
|
69
|
+
"""Prepare SIFT keypoint descriptors for the image."""
|
|
70
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
71
|
+
img = img.convert("L")
|
|
72
|
+
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
73
|
+
|
|
74
|
+
# noinspection PyUnresolvedReferences
|
|
75
|
+
sift: cv.SIFT = cv.SIFT_create(nfeatures=self.max_features)
|
|
76
|
+
_keypoints, desc = sift.detectAndCompute(gray, None)
|
|
77
|
+
|
|
78
|
+
if desc is None:
|
|
79
|
+
# Return empty array with correct shape (0 features, 128 dimensions)
|
|
80
|
+
return np.array([], dtype=np.float32).reshape(0, 128)
|
|
81
|
+
|
|
82
|
+
# SIFT always returns float32 descriptors
|
|
83
|
+
return desc.astype(np.float32)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class AKAZEMethod(FeatureMethodBase):
|
|
87
|
+
"""AKAZE (Accelerated-KAZE) keypoint method."""
|
|
88
|
+
|
|
89
|
+
LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
|
|
90
|
+
|
|
91
|
+
def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
92
|
+
super().__init__("akaze", match_threshold)
|
|
93
|
+
|
|
94
|
+
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
95
|
+
"""AKAZE uses BF matcher for binary descriptors."""
|
|
96
|
+
return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
|
|
97
|
+
|
|
98
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
|
|
99
|
+
"""Prepare AKAZE keypoint descriptors for the image."""
|
|
100
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
101
|
+
img = img.convert("L")
|
|
102
|
+
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
103
|
+
|
|
104
|
+
# noinspection PyUnresolvedReferences
|
|
105
|
+
akaze = cv.AKAZE_create()
|
|
106
|
+
_keypoints, desc = akaze.detectAndCompute(gray, None)
|
|
107
|
+
|
|
108
|
+
if desc is None:
|
|
109
|
+
# Return empty array with correct shape (0 features, 61 bytes for AKAZE)
|
|
110
|
+
return np.array([], dtype=np.uint8).reshape(0, 61)
|
|
111
|
+
|
|
112
|
+
# AKAZE returns binary (uint8) descriptors
|
|
113
|
+
return desc.astype(np.uint8)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ORBMethod(FeatureMethodBase):
|
|
117
|
+
"""ORB (Oriented FAST and Rotated BRIEF) keypoint method."""
|
|
118
|
+
|
|
119
|
+
LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
|
|
120
|
+
|
|
121
|
+
def __init__(self, max_features: int = 0, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
122
|
+
super().__init__("orb", match_threshold)
|
|
123
|
+
self.max_features = max_features
|
|
124
|
+
|
|
125
|
+
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
126
|
+
"""ORB uses BF matcher for binary descriptors."""
|
|
127
|
+
return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
|
|
128
|
+
|
|
129
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
|
|
130
|
+
"""Prepare ORB keypoint descriptors for the image."""
|
|
131
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
132
|
+
img = img.convert("L")
|
|
133
|
+
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
134
|
+
|
|
135
|
+
orb = cv.ORB_create(nfeatures=self.max_features)
|
|
136
|
+
_keypoints, desc = orb.detectAndCompute(gray, None)
|
|
137
|
+
|
|
138
|
+
if desc is None:
|
|
139
|
+
# Return empty array with correct shape (0 features, 32 bytes for ORB)
|
|
140
|
+
return np.array([], dtype=np.uint8).reshape(0, 32)
|
|
141
|
+
|
|
142
|
+
# ORB returns binary (uint8) descriptors
|
|
143
|
+
return desc.astype(np.uint8)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class BRISKMethod(FeatureMethodBase):
|
|
147
|
+
"""BRISK (Binary Robust Invariant Scalable Keypoints) method."""
|
|
148
|
+
|
|
149
|
+
LOWE_RATIO_THRESHOLD = 0.75 # May need slight adjustment for binary features
|
|
150
|
+
|
|
151
|
+
def __init__(self, match_threshold: float = LOWE_RATIO_THRESHOLD) -> None:
|
|
152
|
+
super().__init__("brisk", match_threshold)
|
|
153
|
+
|
|
154
|
+
def _get_matcher(self) -> cv.FlannBasedMatcher | cv.BFMatcher:
|
|
155
|
+
"""BRISK uses BF matcher for binary descriptors."""
|
|
156
|
+
return cv.BFMatcher(cv.NORM_HAMMING, crossCheck=False)
|
|
157
|
+
|
|
158
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:
|
|
159
|
+
"""Prepare BRISK keypoint descriptors for the image."""
|
|
160
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
161
|
+
img = img.convert("L")
|
|
162
|
+
gray: npt.NDArray[np.uint8] = np.array(img.resize((512, 512), Image.Resampling.LANCZOS))
|
|
163
|
+
|
|
164
|
+
# noinspection PyUnresolvedReferences
|
|
165
|
+
brisk = cv.BRISK_create()
|
|
166
|
+
_keypoints, desc = brisk.detectAndCompute(gray, None)
|
|
167
|
+
|
|
168
|
+
if desc is None:
|
|
169
|
+
# Return empty array with correct shape (0 features, 64 bytes for BRISK)
|
|
170
|
+
return np.array([], dtype=np.uint8).reshape(0, 64)
|
|
171
|
+
|
|
172
|
+
# BRISK returns binary (uint8) descriptors
|
|
173
|
+
return desc.astype(np.uint8)
|
photo_compare/file_hash.py
CHANGED
|
@@ -1,29 +1,29 @@
|
|
|
1
|
-
"""File hashing utilities for duplicate detection."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import hashlib
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def file_sha256(path: Path) -> str:
|
|
10
|
-
"""Compute SHA256 hash of file contents."""
|
|
11
|
-
# Python 3.11+ streaming helper with default buffering for efficiency
|
|
12
|
-
# Default buffering (typically 8KB) is critical for WSL/network filesystems
|
|
13
|
-
with path.open("rb") as f:
|
|
14
|
-
return hashlib.file_digest(f, "sha256").hexdigest()
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def binary_files_equal(pa: Path, pb: Path, chunk_size: int = 1 << 20) -> bool:
|
|
18
|
-
"""Return True iff files `a` and `b` are byte-for-byte identical."""
|
|
19
|
-
sa, sb = pa.stat(), pb.stat()
|
|
20
|
-
if sa.st_size != sb.st_size:
|
|
21
|
-
return False
|
|
22
|
-
with pa.open("rb") as fa, pb.open("rb") as fb:
|
|
23
|
-
while True:
|
|
24
|
-
ca = fa.read(chunk_size)
|
|
25
|
-
cb = fb.read(chunk_size)
|
|
26
|
-
if ca != cb:
|
|
27
|
-
return False
|
|
28
|
-
if not ca: # reached EOF on both
|
|
29
|
-
return True
|
|
1
|
+
"""File hashing utilities for duplicate detection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def file_sha256(path: Path) -> str:
|
|
10
|
+
"""Compute SHA256 hash of file contents."""
|
|
11
|
+
# Python 3.11+ streaming helper with default buffering for efficiency
|
|
12
|
+
# Default buffering (typically 8KB) is critical for WSL/network filesystems
|
|
13
|
+
with path.open("rb") as f:
|
|
14
|
+
return hashlib.file_digest(f, "sha256").hexdigest()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def binary_files_equal(pa: Path, pb: Path, chunk_size: int = 1 << 20) -> bool:
|
|
18
|
+
"""Return True iff files `a` and `b` are byte-for-byte identical."""
|
|
19
|
+
sa, sb = pa.stat(), pb.stat()
|
|
20
|
+
if sa.st_size != sb.st_size:
|
|
21
|
+
return False
|
|
22
|
+
with pa.open("rb") as fa, pb.open("rb") as fb:
|
|
23
|
+
while True:
|
|
24
|
+
ca = fa.read(chunk_size)
|
|
25
|
+
cb = fb.read(chunk_size)
|
|
26
|
+
if ca != cb:
|
|
27
|
+
return False
|
|
28
|
+
if not ca: # reached EOF on both
|
|
29
|
+
return True
|
photo_compare/hash_methods.py
CHANGED
|
@@ -1,99 +1,99 @@
|
|
|
1
|
-
"""Hash-based similarity methods with caching support."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import imagehash
|
|
6
|
-
import numpy as np
|
|
7
|
-
import numpy.typing as npt
|
|
8
|
-
from PIL import Image
|
|
9
|
-
|
|
10
|
-
from .base import BinningSimilarityMethod
|
|
11
|
-
from .distance import hamming_similarity
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class AHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
15
|
-
"""Average hash method - very fast, basic similarity detection."""
|
|
16
|
-
|
|
17
|
-
def __init__(self, hash_size: int) -> None:
|
|
18
|
-
super().__init__("ahash")
|
|
19
|
-
self.hash_size = hash_size
|
|
20
|
-
|
|
21
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
22
|
-
"""Prepare average hash for the image."""
|
|
23
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
24
|
-
hash_obj = imagehash.average_hash(img, self.hash_size)
|
|
25
|
-
return np.packbits(hash_obj.hash).tobytes()
|
|
26
|
-
|
|
27
|
-
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
28
|
-
"""Compare hash bytes using Hamming similarity."""
|
|
29
|
-
return hamming_similarity(prep1, prep2)
|
|
30
|
-
|
|
31
|
-
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
32
|
-
"""Use the hash itself as the bin key for exact matches."""
|
|
33
|
-
return prepared
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class DHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
37
|
-
"""Difference hash method - good for detecting crops and borders."""
|
|
38
|
-
|
|
39
|
-
def __init__(self, hash_size: int) -> None:
|
|
40
|
-
super().__init__("dhash")
|
|
41
|
-
self.hash_size = hash_size
|
|
42
|
-
|
|
43
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
44
|
-
"""Prepare difference hash for the image."""
|
|
45
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
46
|
-
hash_obj = imagehash.dhash(img, self.hash_size)
|
|
47
|
-
return np.packbits(hash_obj.hash).tobytes()
|
|
48
|
-
|
|
49
|
-
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
50
|
-
"""Compare hash bytes using Hamming similarity."""
|
|
51
|
-
return hamming_similarity(prep1, prep2)
|
|
52
|
-
|
|
53
|
-
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
54
|
-
"""Use the hash itself as the bin key for exact matches."""
|
|
55
|
-
return prepared
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class PHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
59
|
-
"""Perceptual hash method - DCT based, robust to minor changes."""
|
|
60
|
-
|
|
61
|
-
def __init__(self, hash_size: int) -> None:
|
|
62
|
-
super().__init__("phash")
|
|
63
|
-
self.hash_size = hash_size
|
|
64
|
-
|
|
65
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
66
|
-
"""Prepare perceptual hash for the image."""
|
|
67
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
68
|
-
hash_obj = imagehash.phash(img, self.hash_size)
|
|
69
|
-
return np.packbits(hash_obj.hash).tobytes()
|
|
70
|
-
|
|
71
|
-
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
72
|
-
"""Compare hash bytes using Hamming similarity."""
|
|
73
|
-
return hamming_similarity(prep1, prep2)
|
|
74
|
-
|
|
75
|
-
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
76
|
-
"""Use the hash itself as the bin key for exact matches."""
|
|
77
|
-
return prepared
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
class WHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
81
|
-
"""Wavelet hash method - good for texture detection."""
|
|
82
|
-
|
|
83
|
-
def __init__(self, hash_size: int) -> None:
|
|
84
|
-
super().__init__("whash")
|
|
85
|
-
self.hash_size = hash_size
|
|
86
|
-
|
|
87
|
-
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
88
|
-
"""Prepare wavelet hash for the image."""
|
|
89
|
-
img = Image.fromarray(pixels, mode="RGB")
|
|
90
|
-
hash_obj = imagehash.whash(img, self.hash_size)
|
|
91
|
-
return np.packbits(hash_obj.hash).tobytes()
|
|
92
|
-
|
|
93
|
-
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
94
|
-
"""Compare hash bytes using Hamming similarity."""
|
|
95
|
-
return hamming_similarity(prep1, prep2)
|
|
96
|
-
|
|
97
|
-
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
98
|
-
"""Use the hash itself as the bin key for exact matches."""
|
|
99
|
-
return prepared
|
|
1
|
+
"""Hash-based similarity methods with caching support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import imagehash
|
|
6
|
+
import numpy as np
|
|
7
|
+
import numpy.typing as npt
|
|
8
|
+
from PIL import Image
|
|
9
|
+
|
|
10
|
+
from .base import BinningSimilarityMethod
|
|
11
|
+
from .distance import hamming_similarity
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
15
|
+
"""Average hash method - very fast, basic similarity detection."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, hash_size: int) -> None:
|
|
18
|
+
super().__init__("ahash")
|
|
19
|
+
self.hash_size = hash_size
|
|
20
|
+
|
|
21
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
22
|
+
"""Prepare average hash for the image."""
|
|
23
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
24
|
+
hash_obj = imagehash.average_hash(img, self.hash_size)
|
|
25
|
+
return np.packbits(hash_obj.hash).tobytes()
|
|
26
|
+
|
|
27
|
+
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
28
|
+
"""Compare hash bytes using Hamming similarity."""
|
|
29
|
+
return hamming_similarity(prep1, prep2)
|
|
30
|
+
|
|
31
|
+
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
32
|
+
"""Use the hash itself as the bin key for exact matches."""
|
|
33
|
+
return prepared
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
37
|
+
"""Difference hash method - good for detecting crops and borders."""
|
|
38
|
+
|
|
39
|
+
def __init__(self, hash_size: int) -> None:
|
|
40
|
+
super().__init__("dhash")
|
|
41
|
+
self.hash_size = hash_size
|
|
42
|
+
|
|
43
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
44
|
+
"""Prepare difference hash for the image."""
|
|
45
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
46
|
+
hash_obj = imagehash.dhash(img, self.hash_size)
|
|
47
|
+
return np.packbits(hash_obj.hash).tobytes()
|
|
48
|
+
|
|
49
|
+
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
50
|
+
"""Compare hash bytes using Hamming similarity."""
|
|
51
|
+
return hamming_similarity(prep1, prep2)
|
|
52
|
+
|
|
53
|
+
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
54
|
+
"""Use the hash itself as the bin key for exact matches."""
|
|
55
|
+
return prepared
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class PHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
59
|
+
"""Perceptual hash method - DCT based, robust to minor changes."""
|
|
60
|
+
|
|
61
|
+
def __init__(self, hash_size: int) -> None:
|
|
62
|
+
super().__init__("phash")
|
|
63
|
+
self.hash_size = hash_size
|
|
64
|
+
|
|
65
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
66
|
+
"""Prepare perceptual hash for the image."""
|
|
67
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
68
|
+
hash_obj = imagehash.phash(img, self.hash_size)
|
|
69
|
+
return np.packbits(hash_obj.hash).tobytes()
|
|
70
|
+
|
|
71
|
+
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
72
|
+
"""Compare hash bytes using Hamming similarity."""
|
|
73
|
+
return hamming_similarity(prep1, prep2)
|
|
74
|
+
|
|
75
|
+
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
76
|
+
"""Use the hash itself as the bin key for exact matches."""
|
|
77
|
+
return prepared
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class WHashMethod(BinningSimilarityMethod[bytes, bytes]):
|
|
81
|
+
"""Wavelet hash method - good for texture detection."""
|
|
82
|
+
|
|
83
|
+
def __init__(self, hash_size: int) -> None:
|
|
84
|
+
super().__init__("whash")
|
|
85
|
+
self.hash_size = hash_size
|
|
86
|
+
|
|
87
|
+
def _prepare_single(self, pixels: npt.NDArray[np.uint8]) -> bytes:
|
|
88
|
+
"""Prepare wavelet hash for the image."""
|
|
89
|
+
img = Image.fromarray(pixels, mode="RGB")
|
|
90
|
+
hash_obj = imagehash.whash(img, self.hash_size)
|
|
91
|
+
return np.packbits(hash_obj.hash).tobytes()
|
|
92
|
+
|
|
93
|
+
def _compare_prepared(self, prep1: bytes, prep2: bytes) -> float:
|
|
94
|
+
"""Compare hash bytes using Hamming similarity."""
|
|
95
|
+
return hamming_similarity(prep1, prep2)
|
|
96
|
+
|
|
97
|
+
def _get_bin_key(self, prepared: bytes) -> bytes:
|
|
98
|
+
"""Use the hash itself as the bin key for exact matches."""
|
|
99
|
+
return prepared
|