photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,127 +1,127 @@
1
- """Implementation of perceptual hashing pipeline stage."""
2
-
3
- from __future__ import annotations
4
-
5
- from collections import defaultdict
6
-
7
- import numpy as np
8
-
9
- from photo_compare import create_comparison_method
10
-
11
- from .config import CONFIG
12
- from .photo_file import load_normalized_pixels
13
- from .pipeline_stage import PipelineStage, PrepareResult, WorkerResult
14
- from .ports import InputPort, OutputPort
15
- from .sequence import (
16
- INDEX_T,
17
- PhotoSequence,
18
- )
19
-
20
-
21
- def make_defaultdict() -> dict[int, list[INDEX_T]]:
22
- """Inner helper function needs to be named as lambda : defaultdict(list) does not pickle."""
23
- return defaultdict(list)
24
-
25
-
26
- class ComputePerceptualHash(
27
- PipelineStage[
28
- tuple[int, INDEX_T, str], # S: work item
29
- tuple[int, INDEX_T, bytes], # T: work data
30
- dict[bytes, dict[int, list[INDEX_T]]], # R: accumulator
31
- ]
32
- ):
33
- def __init__(self) -> None:
34
- """Initialize the perceptual matching stage."""
35
- super().__init__(
36
- path=CONFIG.paths.perceptual_hash_bins_pkl,
37
- stage_name="Perceptual Hash Calculation",
38
- )
39
-
40
- # Store worker argument
41
- self.args = self.stage_name
42
-
43
- # Create input port for forest (from ComputeIndices)
44
- self.forest_i: InputPort[list[PhotoSequence]] = InputPort("forest")
45
-
46
- # Create output port for perceptual hash bins
47
- self.perceptual_bins_o: OutputPort[dict[bytes, dict[int, list[INDEX_T]]]] = OutputPort(
48
- self, getter=lambda: self.result
49
- )
50
-
51
- def prepare(
52
- self,
53
- ) -> PrepareResult[tuple[int, INDEX_T, str], dict[bytes, dict[int, list[INDEX_T]]]]:
54
- """Prepare perceptual hash work items by reading forest from input port.
55
-
56
- Returns:
57
- Tuple of (work_items, result_accumulator)
58
- """
59
- # Read forest from input port
60
- forest = self.forest_i.read()
61
- # Get reference counts from upstream for UI statistics tracking
62
- self.ref_photos_init = self.forest_i.get_ref_photo_count()
63
- self.ref_seqs_init = self.forest_i.get_ref_sequence_count()
64
- # Count total photos for internal invariant checking (should never change)
65
- self.total_photos = sum(seq.n_photos for seq in forest)
66
-
67
- # Create work items from all photos in all sequences
68
- work: list[tuple[int, INDEX_T, str]] = [
69
- (seq_idx, idx, str(photo.path))
70
- for seq_idx, seq in enumerate(forest)
71
- for idx, photo in seq.get_reference().items()
72
- ]
73
-
74
- # Initialize result accumulator
75
- result: dict[bytes, dict[int, list[INDEX_T]]] = defaultdict(make_defaultdict)
76
-
77
- # ASSERTION: Verify work items created for all reference photos
78
- assert len(work) == sum(len(seq.get_reference()) for seq in forest), (
79
- f"Work item count mismatch: have {len(work)} work items"
80
- )
81
-
82
- return work, result
83
-
84
- @classmethod
85
- def stage_worker(cls, job: tuple[int, INDEX_T, str], _args: str) -> WorkerResult[tuple[int, INDEX_T, bytes]]:
86
- """Calculate perceptual hash for a photo.
87
-
88
- Normalizes to landscape orientation before calculating hash to ensure
89
- consistent hash values regardless of portrait/landscape orientation.
90
- """
91
- seq_idx, idx, path = job
92
- cmp = create_comparison_method(CONFIG.sequences.PERCEPTUAL_METHOD)
93
-
94
- # Load with EXIF normalization
95
- pixels = load_normalized_pixels(path)
96
-
97
- # Additional normalization: rotate portrait to landscape for consistent phash
98
- # Portrait photos (width < height) are rotated 90° CCW to landscape
99
- if pixels.shape[1] < pixels.shape[0]: # width < height
100
- pixels = np.rot90(pixels, k=1) # Rotate 90° CCW
101
-
102
- return [], [], (seq_idx, idx, cmp.prepare(pixels))
103
-
104
- def accumulate_results(
105
- self,
106
- accum: dict[bytes, dict[int, list[INDEX_T]]],
107
- job: tuple[int, INDEX_T, bytes],
108
- ) -> None:
109
- seq, idx, key = job
110
- accum[key][seq].append(idx)
111
-
112
- def finalise(self) -> None:
113
- # Count reference photos across all hash bins
114
- # The result dict maps hash values -> sequence indices -> photo index lists
115
- # We sum the length of all photo index lists across all bins
116
- self.ref_photos_final = sum(len(indices) for bin_dict in self.result.values() for indices in bin_dict.values())
117
- # Sequence count remains unchanged (this stage just bins existing sequences by hash)
118
- self.ref_seqs_final = self.ref_seqs_init
119
-
120
- # Invariant: reference photo count should match (this stage doesn't change photos)
121
- assert self.ref_photos_final == self.ref_photos_init, (
122
- f"ComputePerceptualHash: reference photo count mismatch - "
123
- f"started with {self.ref_photos_init}, ended with {self.ref_photos_final}"
124
- )
125
-
126
- # Typed result field - perceptual hash bins
127
- result: dict[bytes, dict[int, list[INDEX_T]]]
1
+ """Implementation of perceptual hashing pipeline stage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import defaultdict
6
+
7
+ import numpy as np
8
+
9
+ from photo_compare import create_comparison_method
10
+
11
+ from .config import CONFIG
12
+ from .photo_file import load_normalized_pixels
13
+ from .pipeline_stage import PipelineStage, PrepareResult, WorkerResult
14
+ from .ports import InputPort, OutputPort
15
+ from .sequence import (
16
+ INDEX_T,
17
+ PhotoSequence,
18
+ )
19
+
20
+
21
+ def make_defaultdict() -> dict[int, list[INDEX_T]]:
22
+ """Inner helper function needs to be named as lambda : defaultdict(list) does not pickle."""
23
+ return defaultdict(list)
24
+
25
+
26
+ class ComputePerceptualHash(
27
+ PipelineStage[
28
+ tuple[int, INDEX_T, str], # S: work item
29
+ tuple[int, INDEX_T, bytes], # T: work data
30
+ dict[bytes, dict[int, list[INDEX_T]]], # R: accumulator
31
+ ]
32
+ ):
33
+ def __init__(self) -> None:
34
+ """Initialize the perceptual matching stage."""
35
+ super().__init__(
36
+ path=CONFIG.paths.perceptual_hash_bins_pkl,
37
+ stage_name="Perceptual Hash Calculation",
38
+ )
39
+
40
+ # Store worker argument
41
+ self.args = self.stage_name
42
+
43
+ # Create input port for forest (from ComputeIndices)
44
+ self.forest_i: InputPort[list[PhotoSequence]] = InputPort("forest")
45
+
46
+ # Create output port for perceptual hash bins
47
+ self.perceptual_bins_o: OutputPort[dict[bytes, dict[int, list[INDEX_T]]]] = OutputPort(
48
+ self, getter=lambda: self.result
49
+ )
50
+
51
+ def prepare(
52
+ self,
53
+ ) -> PrepareResult[tuple[int, INDEX_T, str], dict[bytes, dict[int, list[INDEX_T]]]]:
54
+ """Prepare perceptual hash work items by reading forest from input port.
55
+
56
+ Returns:
57
+ Tuple of (work_items, result_accumulator)
58
+ """
59
+ # Read forest from input port
60
+ forest = self.forest_i.read()
61
+ # Get reference counts from upstream for UI statistics tracking
62
+ self.ref_photos_init = self.forest_i.get_ref_photo_count()
63
+ self.ref_seqs_init = self.forest_i.get_ref_sequence_count()
64
+ # Count total photos for internal invariant checking (should never change)
65
+ self.total_photos = sum(seq.n_photos for seq in forest)
66
+
67
+ # Create work items from all photos in all sequences
68
+ work: list[tuple[int, INDEX_T, str]] = [
69
+ (seq_idx, idx, str(photo.path))
70
+ for seq_idx, seq in enumerate(forest)
71
+ for idx, photo in seq.get_reference().items()
72
+ ]
73
+
74
+ # Initialize result accumulator
75
+ result: dict[bytes, dict[int, list[INDEX_T]]] = defaultdict(make_defaultdict)
76
+
77
+ # ASSERTION: Verify work items created for all reference photos
78
+ assert len(work) == sum(len(seq.get_reference()) for seq in forest), (
79
+ f"Work item count mismatch: have {len(work)} work items"
80
+ )
81
+
82
+ return work, result
83
+
84
+ @classmethod
85
+ def stage_worker(cls, job: tuple[int, INDEX_T, str], _args: str) -> WorkerResult[tuple[int, INDEX_T, bytes]]:
86
+ """Calculate perceptual hash for a photo.
87
+
88
+ Normalizes to landscape orientation before calculating hash to ensure
89
+ consistent hash values regardless of portrait/landscape orientation.
90
+ """
91
+ seq_idx, idx, path = job
92
+ cmp = create_comparison_method(CONFIG.sequences.PERCEPTUAL_METHOD)
93
+
94
+ # Load with EXIF normalization
95
+ pixels = load_normalized_pixels(path)
96
+
97
+ # Additional normalization: rotate portrait to landscape for consistent phash
98
+ # Portrait photos (width < height) are rotated 90° CCW to landscape
99
+ if pixels.shape[1] < pixels.shape[0]: # width < height
100
+ pixels = np.rot90(pixels, k=1) # Rotate 90° CCW
101
+
102
+ return [], [], (seq_idx, idx, cmp.prepare(pixels))
103
+
104
+ def accumulate_results(
105
+ self,
106
+ accum: dict[bytes, dict[int, list[INDEX_T]]],
107
+ job: tuple[int, INDEX_T, bytes],
108
+ ) -> None:
109
+ seq, idx, key = job
110
+ accum[key][seq].append(idx)
111
+
112
+ def finalise(self) -> None:
113
+ # Count reference photos across all hash bins
114
+ # The result dict maps hash values -> sequence indices -> photo index lists
115
+ # We sum the length of all photo index lists across all bins
116
+ self.ref_photos_final = sum(len(indices) for bin_dict in self.result.values() for indices in bin_dict.values())
117
+ # Sequence count remains unchanged (this stage just bins existing sequences by hash)
118
+ self.ref_seqs_final = self.ref_seqs_init
119
+
120
+ # Invariant: reference photo count should match (this stage doesn't change photos)
121
+ assert self.ref_photos_final == self.ref_photos_init, (
122
+ f"ComputePerceptualHash: reference photo count mismatch - "
123
+ f"started with {self.ref_photos_init}, ended with {self.ref_photos_final}"
124
+ )
125
+
126
+ # Typed result field - perceptual hash bins
127
+ result: dict[bytes, dict[int, list[INDEX_T]]]