photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
photo_compare/config.py CHANGED
@@ -1,225 +1,225 @@
1
- """Self-contained configuration for photo_compare library.
2
-
3
- This module provides default configuration values that can be overridden
4
- by the parent project via configure() function. All config classes are
5
- frozen dataclasses for immutability and safety.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from dataclasses import dataclass, field
11
- from typing import Any, cast
12
-
13
-
14
- @dataclass(frozen=True)
15
- class ThresholdConfig:
16
- """Similarity thresholds for comparison methods.
17
-
18
- All thresholds are in the range [0, 1+] where higher values mean
19
- images must be more similar to be considered duplicates. Most thresholds
20
- are based on empirical benchmark results.
21
-
22
- Hash methods (0-1 range):
23
- AHASH: Average hash threshold
24
- DHASH: Difference hash threshold (best hash method)
25
- PHASH: Perceptual hash threshold
26
- WHASH: Wavelet hash threshold
27
-
28
- Feature methods (0-2 range, match ratio):
29
- SIFT: SIFT feature matching threshold
30
- AKAZE: AKAZE feature matching threshold
31
- ORB: ORB feature matching threshold
32
- BRISK: BRISK feature matching threshold (very high)
33
-
34
- Structural methods (0-1 range):
35
- SSIM: SSIM threshold (best overall)
36
- MS_SSIM: Multi-scale SSIM threshold
37
- HOG: HOG features threshold
38
-
39
- Histogram methods (0-1 range):
40
- COLOR_HISTOGRAM: RGB histogram correlation threshold
41
- HSV_HISTOGRAM: HSV histogram correlation threshold
42
-
43
- Pixel methods (0-1 range):
44
- MSE: Mean squared error threshold
45
- PSNR: Peak signal-to-noise ratio threshold
46
- """
47
-
48
- # Hash methods
49
- AHASH: float = 0.95313
50
- DHASH: float = 0.75000
51
- PHASH: float = 0.71875
52
- WHASH: float = 0.96875
53
-
54
- # Feature methods
55
- SIFT: float = 0.61538
56
- AKAZE: float = 0.66667
57
- ORB: float = 0.55556
58
- BRISK: float = 1.52381
59
-
60
- # Structural methods
61
- SSIM: float = 0.9 # 0.56363 - this number is too small
62
- MS_SSIM: float = 0.65609
63
- HOG: float = 0.83389
64
-
65
- # Histogram methods
66
- COLOR_HISTOGRAM: float = 0.0
67
- HSV_HISTOGRAM: float = 0.0
68
-
69
- # Pixel methods
70
- MSE: float = 0.0
71
- PSNR: float = 0.0
72
-
73
-
74
- @dataclass(frozen=True)
75
- class ImageProcessingConfig:
76
- """Image processing parameters.
77
-
78
- These parameters control how images are prepared for comparison.
79
- Larger values generally mean more detail but slower processing.
80
-
81
- Attributes:
82
- BASIC_HASH_SIZE: Hash size for hash methods (8 = 64 bits)
83
- SIFT_MAX_FEATURES: Maximum SIFT keypoints to detect
84
- ORB_MAX_FEATURES: Maximum ORB keypoints to detect
85
- SSIM_SIDE_SIZE: Image resize dimension for SSIM
86
- HOG_ORIENTATIONS: Number of HOG gradient orientations
87
- HOG_PIXELS_PER_CELL: HOG cell size in pixels
88
- COLOR_HIST_BINS: Number of bins per RGB channel
89
- HSV_HIST_BINS: Number of bins for H, S, V channels
90
- """
91
-
92
- BASIC_HASH_SIZE: int = 8
93
- SIFT_MAX_FEATURES: int = 500
94
- ORB_MAX_FEATURES: int = 500
95
- SSIM_SIDE_SIZE: int = 256
96
- HOG_ORIENTATIONS: int = 9
97
- HOG_PIXELS_PER_CELL: tuple[int, int] = (8, 8)
98
- COLOR_HIST_BINS: int = 32
99
- HSV_HIST_BINS: tuple[int, int, int] = (16, 16, 16)
100
-
101
-
102
- @dataclass(frozen=True)
103
- class FeatureConfig:
104
- """Feature matching parameters.
105
-
106
- Attributes:
107
- LOWE_RATIO: Lowe's ratio test threshold (0.7 is standard)
108
- """
109
-
110
- LOWE_RATIO: float = 0.7
111
-
112
-
113
- @dataclass
114
- class PhotoCompareConfig:
115
- """Central configuration for photo_compare library.
116
-
117
- This is a mutable container for the three frozen config sections.
118
- Use configure() to create updated versions.
119
- """
120
-
121
- thresholds: ThresholdConfig = field(default_factory=ThresholdConfig)
122
- image_processing: ImageProcessingConfig = field(default_factory=ImageProcessingConfig)
123
- features: FeatureConfig = field(default_factory=FeatureConfig)
124
-
125
-
126
- # Global configuration instance
127
- _config = PhotoCompareConfig()
128
-
129
-
130
- def get_config() -> PhotoCompareConfig:
131
- """Get the current configuration.
132
-
133
- Returns:
134
- Current PhotoCompareConfig instance
135
- """
136
- return _config
137
-
138
-
139
- def configure(
140
- thresholds: dict[str, float] | None = None,
141
- image_processing: dict[str, int | tuple[int, int] | tuple[int, int, int]] | None = None,
142
- features: dict[str, float] | None = None,
143
- ) -> None:
144
- """Configure the photo_compare library by creating new frozen config sections.
145
-
146
- Since config sections are frozen dataclasses, this function creates new
147
- instances with updated values and replaces the global config.
148
-
149
- Args:
150
- thresholds: Dictionary of threshold values to override
151
- image_processing: Dictionary of image processing parameters to override
152
- features: Dictionary of feature matching parameters to override
153
-
154
- Raises:
155
- AttributeError: If attempting to set unknown config parameter
156
-
157
- Example:
158
- >>> configure(
159
- ... thresholds={'AHASH': 0.95, 'DHASH': 0.75},
160
- ... image_processing={'BASIC_HASH_SIZE': 16}
161
- ... )
162
- """
163
- global _config # noqa: PLW0603
164
- # Standard library config pattern (like logging.basicConfig)
165
-
166
- # Build new threshold config if needed
167
- new_thresholds: ThresholdConfig
168
- if thresholds:
169
- # Get current values as dict
170
- threshold_dict: dict[str, float] = _config.thresholds.__dict__.copy()
171
- # Update with new values
172
- key: str
173
- value: float
174
- for key, value in thresholds.items():
175
- if key not in threshold_dict:
176
- raise AttributeError(f"Unknown threshold parameter: {key}")
177
- threshold_dict[key] = value
178
- # Create new frozen instance
179
- new_thresholds = ThresholdConfig(**threshold_dict)
180
- else:
181
- new_thresholds = _config.thresholds
182
-
183
- # Build new image processing config if needed
184
- new_image_processing: ImageProcessingConfig
185
- if image_processing:
186
- ip_dict: dict[str, int | tuple[int, int] | tuple[int, int, int]] = _config.image_processing.__dict__.copy()
187
- ip_key: str
188
- ip_value: int | tuple[int, int] | tuple[int, int, int]
189
- for ip_key, ip_value in image_processing.items():
190
- if ip_key not in ip_dict:
191
- raise AttributeError(f"Unknown image_processing parameter: {ip_key}")
192
- ip_dict[ip_key] = ip_value
193
- # Use cast to handle union type unpacking - we know the dict is structurally correct
194
- new_image_processing = ImageProcessingConfig(**cast(Any, ip_dict))
195
- else:
196
- new_image_processing = _config.image_processing
197
-
198
- # Build new features config if needed
199
- new_features: FeatureConfig
200
- if features:
201
- feat_dict: dict[str, float] = _config.features.__dict__.copy()
202
- feat_key: str
203
- feat_value: float
204
- for feat_key, feat_value in features.items():
205
- if feat_key not in feat_dict:
206
- raise AttributeError(f"Unknown features parameter: {feat_key}")
207
- feat_dict[feat_key] = feat_value
208
- new_features = FeatureConfig(**feat_dict)
209
- else:
210
- new_features = _config.features
211
-
212
- # Replace global config
213
- _config = PhotoCompareConfig(
214
- thresholds=new_thresholds, image_processing=new_image_processing, features=new_features
215
- )
216
-
217
-
218
- def reset_config() -> None:
219
- """Reset configuration to defaults.
220
-
221
- Creates a new PhotoCompareConfig with default values.
222
- """
223
- global _config # noqa: PLW0603
224
- # Standard library config pattern (like logging.basicConfig)
225
- _config = PhotoCompareConfig()
1
+ """Self-contained configuration for photo_compare library.
2
+
3
+ This module provides default configuration values that can be overridden
4
+ by the parent project via configure() function. All config classes are
5
+ frozen dataclasses for immutability and safety.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, cast
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ThresholdConfig:
16
+ """Similarity thresholds for comparison methods.
17
+
18
+ All thresholds are in the range [0, 1+] where higher values mean
19
+ images must be more similar to be considered duplicates. Most thresholds
20
+ are based on empirical benchmark results.
21
+
22
+ Hash methods (0-1 range):
23
+ AHASH: Average hash threshold
24
+ DHASH: Difference hash threshold (best hash method)
25
+ PHASH: Perceptual hash threshold
26
+ WHASH: Wavelet hash threshold
27
+
28
+ Feature methods (0-2 range, match ratio):
29
+ SIFT: SIFT feature matching threshold
30
+ AKAZE: AKAZE feature matching threshold
31
+ ORB: ORB feature matching threshold
32
+ BRISK: BRISK feature matching threshold (very high)
33
+
34
+ Structural methods (0-1 range):
35
+ SSIM: SSIM threshold (best overall)
36
+ MS_SSIM: Multi-scale SSIM threshold
37
+ HOG: HOG features threshold
38
+
39
+ Histogram methods (0-1 range):
40
+ COLOR_HISTOGRAM: RGB histogram correlation threshold
41
+ HSV_HISTOGRAM: HSV histogram correlation threshold
42
+
43
+ Pixel methods (0-1 range):
44
+ MSE: Mean squared error threshold
45
+ PSNR: Peak signal-to-noise ratio threshold
46
+ """
47
+
48
+ # Hash methods
49
+ AHASH: float = 0.95313
50
+ DHASH: float = 0.75000
51
+ PHASH: float = 0.71875
52
+ WHASH: float = 0.96875
53
+
54
+ # Feature methods
55
+ SIFT: float = 0.61538
56
+ AKAZE: float = 0.66667
57
+ ORB: float = 0.55556
58
+ BRISK: float = 1.52381
59
+
60
+ # Structural methods
61
+ SSIM: float = 0.9 # 0.56363 - this number is too small
62
+ MS_SSIM: float = 0.65609
63
+ HOG: float = 0.83389
64
+
65
+ # Histogram methods
66
+ COLOR_HISTOGRAM: float = 0.0
67
+ HSV_HISTOGRAM: float = 0.0
68
+
69
+ # Pixel methods
70
+ MSE: float = 0.0
71
+ PSNR: float = 0.0
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class ImageProcessingConfig:
76
+ """Image processing parameters.
77
+
78
+ These parameters control how images are prepared for comparison.
79
+ Larger values generally mean more detail but slower processing.
80
+
81
+ Attributes:
82
+ BASIC_HASH_SIZE: Hash size for hash methods (8 = 64 bits)
83
+ SIFT_MAX_FEATURES: Maximum SIFT keypoints to detect
84
+ ORB_MAX_FEATURES: Maximum ORB keypoints to detect
85
+ SSIM_SIDE_SIZE: Image resize dimension for SSIM
86
+ HOG_ORIENTATIONS: Number of HOG gradient orientations
87
+ HOG_PIXELS_PER_CELL: HOG cell size in pixels
88
+ COLOR_HIST_BINS: Number of bins per RGB channel
89
+ HSV_HIST_BINS: Number of bins for H, S, V channels
90
+ """
91
+
92
+ BASIC_HASH_SIZE: int = 8
93
+ SIFT_MAX_FEATURES: int = 500
94
+ ORB_MAX_FEATURES: int = 500
95
+ SSIM_SIDE_SIZE: int = 256
96
+ HOG_ORIENTATIONS: int = 9
97
+ HOG_PIXELS_PER_CELL: tuple[int, int] = (8, 8)
98
+ COLOR_HIST_BINS: int = 32
99
+ HSV_HIST_BINS: tuple[int, int, int] = (16, 16, 16)
100
+
101
+
102
+ @dataclass(frozen=True)
103
+ class FeatureConfig:
104
+ """Feature matching parameters.
105
+
106
+ Attributes:
107
+ LOWE_RATIO: Lowe's ratio test threshold (0.7 is standard)
108
+ """
109
+
110
+ LOWE_RATIO: float = 0.7
111
+
112
+
113
+ @dataclass
114
+ class PhotoCompareConfig:
115
+ """Central configuration for photo_compare library.
116
+
117
+ This is a mutable container for the three frozen config sections.
118
+ Use configure() to create updated versions.
119
+ """
120
+
121
+ thresholds: ThresholdConfig = field(default_factory=ThresholdConfig)
122
+ image_processing: ImageProcessingConfig = field(default_factory=ImageProcessingConfig)
123
+ features: FeatureConfig = field(default_factory=FeatureConfig)
124
+
125
+
126
+ # Global configuration instance
127
+ _config = PhotoCompareConfig()
128
+
129
+
130
+ def get_config() -> PhotoCompareConfig:
131
+ """Get the current configuration.
132
+
133
+ Returns:
134
+ Current PhotoCompareConfig instance
135
+ """
136
+ return _config
137
+
138
+
139
+ def configure(
140
+ thresholds: dict[str, float] | None = None,
141
+ image_processing: dict[str, int | tuple[int, int] | tuple[int, int, int]] | None = None,
142
+ features: dict[str, float] | None = None,
143
+ ) -> None:
144
+ """Configure the photo_compare library by creating new frozen config sections.
145
+
146
+ Since config sections are frozen dataclasses, this function creates new
147
+ instances with updated values and replaces the global config.
148
+
149
+ Args:
150
+ thresholds: Dictionary of threshold values to override
151
+ image_processing: Dictionary of image processing parameters to override
152
+ features: Dictionary of feature matching parameters to override
153
+
154
+ Raises:
155
+ AttributeError: If attempting to set unknown config parameter
156
+
157
+ Example:
158
+ >>> configure(
159
+ ... thresholds={'AHASH': 0.95, 'DHASH': 0.75},
160
+ ... image_processing={'BASIC_HASH_SIZE': 16}
161
+ ... )
162
+ """
163
+ global _config # noqa: PLW0603
164
+ # Standard library config pattern (like logging.basicConfig)
165
+
166
+ # Build new threshold config if needed
167
+ new_thresholds: ThresholdConfig
168
+ if thresholds:
169
+ # Get current values as dict
170
+ threshold_dict: dict[str, float] = _config.thresholds.__dict__.copy()
171
+ # Update with new values
172
+ key: str
173
+ value: float
174
+ for key, value in thresholds.items():
175
+ if key not in threshold_dict:
176
+ raise AttributeError(f"Unknown threshold parameter: {key}")
177
+ threshold_dict[key] = value
178
+ # Create new frozen instance
179
+ new_thresholds = ThresholdConfig(**threshold_dict)
180
+ else:
181
+ new_thresholds = _config.thresholds
182
+
183
+ # Build new image processing config if needed
184
+ new_image_processing: ImageProcessingConfig
185
+ if image_processing:
186
+ ip_dict: dict[str, int | tuple[int, int] | tuple[int, int, int]] = _config.image_processing.__dict__.copy()
187
+ ip_key: str
188
+ ip_value: int | tuple[int, int] | tuple[int, int, int]
189
+ for ip_key, ip_value in image_processing.items():
190
+ if ip_key not in ip_dict:
191
+ raise AttributeError(f"Unknown image_processing parameter: {ip_key}")
192
+ ip_dict[ip_key] = ip_value
193
+ # Use cast to handle union type unpacking - we know the dict is structurally correct
194
+ new_image_processing = ImageProcessingConfig(**cast(Any, ip_dict))
195
+ else:
196
+ new_image_processing = _config.image_processing
197
+
198
+ # Build new features config if needed
199
+ new_features: FeatureConfig
200
+ if features:
201
+ feat_dict: dict[str, float] = _config.features.__dict__.copy()
202
+ feat_key: str
203
+ feat_value: float
204
+ for feat_key, feat_value in features.items():
205
+ if feat_key not in feat_dict:
206
+ raise AttributeError(f"Unknown features parameter: {feat_key}")
207
+ feat_dict[feat_key] = feat_value
208
+ new_features = FeatureConfig(**feat_dict)
209
+ else:
210
+ new_features = _config.features
211
+
212
+ # Replace global config
213
+ _config = PhotoCompareConfig(
214
+ thresholds=new_thresholds, image_processing=new_image_processing, features=new_features
215
+ )
216
+
217
+
218
+ def reset_config() -> None:
219
+ """Reset configuration to defaults.
220
+
221
+ Creates a new PhotoCompareConfig with default values.
222
+ """
223
+ global _config # noqa: PLW0603
224
+ # Standard library config pattern (like logging.basicConfig)
225
+ _config = PhotoCompareConfig()
photo_compare/distance.py CHANGED
@@ -1,15 +1,15 @@
1
- """Distance and similarity utility functions."""
2
-
3
- from __future__ import annotations
4
-
5
-
6
- def hamming_distance(hash1: bytes, hash2: bytes) -> int:
7
- """Compare any hash-based method using Hamming distance."""
8
- return sum(bin(b1 ^ b2).count("1") for b1, b2 in zip(hash1, hash2, strict=False))
9
-
10
-
11
- def hamming_similarity(hash1: bytes, hash2: bytes) -> float:
12
- """Convert Hamming distance to similarity score (0-1, higher is more similar)."""
13
- max_distance = len(hash1) * 8
14
- distance = hamming_distance(hash1, hash2)
15
- return 1.0 - (distance / max_distance)
1
+ """Distance and similarity utility functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def hamming_distance(hash1: bytes, hash2: bytes) -> int:
7
+ """Compare any hash-based method using Hamming distance."""
8
+ return sum(bin(b1 ^ b2).count("1") for b1, b2 in zip(hash1, hash2, strict=False))
9
+
10
+
11
+ def hamming_similarity(hash1: bytes, hash2: bytes) -> float:
12
+ """Convert Hamming distance to similarity score (0-1, higher is more similar)."""
13
+ max_distance = len(hash1) * 8
14
+ distance = hamming_distance(hash1, hash2)
15
+ return 1.0 - (distance / max_distance)