photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +2 -2
- orchestrator/app.py +6 -11
- orchestrator/build_pipeline.py +19 -21
- orchestrator/orchestrator_runner.py +11 -8
- orchestrator/pipeline_builder.py +126 -126
- orchestrator/pipeline_orchestrator.py +604 -604
- orchestrator/review_persistence.py +162 -162
- orchestrator/static/orchestrator.css +76 -76
- orchestrator/static/orchestrator.html +11 -5
- orchestrator/static/orchestrator.js +3 -1
- overlap_metrics/__init__.py +1 -1
- overlap_metrics/config.py +135 -135
- overlap_metrics/core.py +284 -284
- overlap_metrics/estimators.py +292 -292
- overlap_metrics/metrics.py +307 -307
- overlap_metrics/registry.py +99 -99
- overlap_metrics/utils.py +104 -104
- photo_compare/__init__.py +1 -1
- photo_compare/base.py +285 -285
- photo_compare/config.py +225 -225
- photo_compare/distance.py +15 -15
- photo_compare/feature_methods.py +173 -173
- photo_compare/file_hash.py +29 -29
- photo_compare/hash_methods.py +99 -99
- photo_compare/histogram_methods.py +118 -118
- photo_compare/pixel_methods.py +58 -58
- photo_compare/structural_methods.py +104 -104
- photo_compare/types.py +28 -28
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
- photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
- scripts/orchestrate.py +12 -10
- utils/__init__.py +4 -3
- utils/base_pipeline_stage.py +171 -171
- utils/base_ports.py +176 -176
- utils/benchmark_utils.py +823 -823
- utils/channel.py +74 -74
- utils/comparison_gates.py +40 -21
- utils/compute_benchmarks.py +355 -355
- utils/compute_identical.py +94 -24
- utils/compute_indices.py +235 -235
- utils/compute_perceptual_hash.py +127 -127
- utils/compute_perceptual_match.py +240 -240
- utils/compute_sha_bins.py +64 -20
- utils/compute_template_similarity.py +1 -1
- utils/compute_versions.py +483 -483
- utils/config.py +8 -5
- utils/data_io.py +83 -83
- utils/graph_context.py +44 -44
- utils/logger.py +2 -2
- utils/models.py +2 -2
- utils/photo_file.py +90 -91
- utils/pipeline_graph.py +334 -334
- utils/pipeline_stage.py +408 -408
- utils/plot_helpers.py +123 -123
- utils/ports.py +136 -136
- utils/progress.py +415 -415
- utils/report_builder.py +139 -139
- utils/review_types.py +55 -55
- utils/review_utils.py +10 -19
- utils/sequence.py +10 -8
- utils/sequence_clustering.py +1 -1
- utils/template.py +57 -57
- utils/template_parsing.py +71 -0
- photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
utils/compute_perceptual_hash.py
CHANGED
|
@@ -1,127 +1,127 @@
|
|
|
1
|
-
"""Implementation of perceptual hashing pipeline stage."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from collections import defaultdict
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
|
|
9
|
-
from photo_compare import create_comparison_method
|
|
10
|
-
|
|
11
|
-
from .config import CONFIG
|
|
12
|
-
from .photo_file import load_normalized_pixels
|
|
13
|
-
from .pipeline_stage import PipelineStage, PrepareResult, WorkerResult
|
|
14
|
-
from .ports import InputPort, OutputPort
|
|
15
|
-
from .sequence import (
|
|
16
|
-
INDEX_T,
|
|
17
|
-
PhotoSequence,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def make_defaultdict() -> dict[int, list[INDEX_T]]:
|
|
22
|
-
"""Inner helper function needs to be named as lambda : defaultdict(list) does not pickle."""
|
|
23
|
-
return defaultdict(list)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class ComputePerceptualHash(
|
|
27
|
-
PipelineStage[
|
|
28
|
-
tuple[int, INDEX_T, str], # S: work item
|
|
29
|
-
tuple[int, INDEX_T, bytes], # T: work data
|
|
30
|
-
dict[bytes, dict[int, list[INDEX_T]]], # R: accumulator
|
|
31
|
-
]
|
|
32
|
-
):
|
|
33
|
-
def __init__(self) -> None:
|
|
34
|
-
"""Initialize the perceptual matching stage."""
|
|
35
|
-
super().__init__(
|
|
36
|
-
path=CONFIG.paths.perceptual_hash_bins_pkl,
|
|
37
|
-
stage_name="Perceptual Hash Calculation",
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
# Store worker argument
|
|
41
|
-
self.args = self.stage_name
|
|
42
|
-
|
|
43
|
-
# Create input port for forest (from ComputeIndices)
|
|
44
|
-
self.forest_i: InputPort[list[PhotoSequence]] = InputPort("forest")
|
|
45
|
-
|
|
46
|
-
# Create output port for perceptual hash bins
|
|
47
|
-
self.perceptual_bins_o: OutputPort[dict[bytes, dict[int, list[INDEX_T]]]] = OutputPort(
|
|
48
|
-
self, getter=lambda: self.result
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
def prepare(
|
|
52
|
-
self,
|
|
53
|
-
) -> PrepareResult[tuple[int, INDEX_T, str], dict[bytes, dict[int, list[INDEX_T]]]]:
|
|
54
|
-
"""Prepare perceptual hash work items by reading forest from input port.
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
Tuple of (work_items, result_accumulator)
|
|
58
|
-
"""
|
|
59
|
-
# Read forest from input port
|
|
60
|
-
forest = self.forest_i.read()
|
|
61
|
-
# Get reference counts from upstream for UI statistics tracking
|
|
62
|
-
self.ref_photos_init = self.forest_i.get_ref_photo_count()
|
|
63
|
-
self.ref_seqs_init = self.forest_i.get_ref_sequence_count()
|
|
64
|
-
# Count total photos for internal invariant checking (should never change)
|
|
65
|
-
self.total_photos = sum(seq.n_photos for seq in forest)
|
|
66
|
-
|
|
67
|
-
# Create work items from all photos in all sequences
|
|
68
|
-
work: list[tuple[int, INDEX_T, str]] = [
|
|
69
|
-
(seq_idx, idx, str(photo.path))
|
|
70
|
-
for seq_idx, seq in enumerate(forest)
|
|
71
|
-
for idx, photo in seq.get_reference().items()
|
|
72
|
-
]
|
|
73
|
-
|
|
74
|
-
# Initialize result accumulator
|
|
75
|
-
result: dict[bytes, dict[int, list[INDEX_T]]] = defaultdict(make_defaultdict)
|
|
76
|
-
|
|
77
|
-
# ASSERTION: Verify work items created for all reference photos
|
|
78
|
-
assert len(work) == sum(len(seq.get_reference()) for seq in forest), (
|
|
79
|
-
f"Work item count mismatch: have {len(work)} work items"
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
return work, result
|
|
83
|
-
|
|
84
|
-
@classmethod
|
|
85
|
-
def stage_worker(cls, job: tuple[int, INDEX_T, str], _args: str) -> WorkerResult[tuple[int, INDEX_T, bytes]]:
|
|
86
|
-
"""Calculate perceptual hash for a photo.
|
|
87
|
-
|
|
88
|
-
Normalizes to landscape orientation before calculating hash to ensure
|
|
89
|
-
consistent hash values regardless of portrait/landscape orientation.
|
|
90
|
-
"""
|
|
91
|
-
seq_idx, idx, path = job
|
|
92
|
-
cmp = create_comparison_method(CONFIG.sequences.PERCEPTUAL_METHOD)
|
|
93
|
-
|
|
94
|
-
# Load with EXIF normalization
|
|
95
|
-
pixels = load_normalized_pixels(path)
|
|
96
|
-
|
|
97
|
-
# Additional normalization: rotate portrait to landscape for consistent phash
|
|
98
|
-
# Portrait photos (width < height) are rotated 90° CCW to landscape
|
|
99
|
-
if pixels.shape[1] < pixels.shape[0]: # width < height
|
|
100
|
-
pixels = np.rot90(pixels, k=1) # Rotate 90° CCW
|
|
101
|
-
|
|
102
|
-
return [], [], (seq_idx, idx, cmp.prepare(pixels))
|
|
103
|
-
|
|
104
|
-
def accumulate_results(
|
|
105
|
-
self,
|
|
106
|
-
accum: dict[bytes, dict[int, list[INDEX_T]]],
|
|
107
|
-
job: tuple[int, INDEX_T, bytes],
|
|
108
|
-
) -> None:
|
|
109
|
-
seq, idx, key = job
|
|
110
|
-
accum[key][seq].append(idx)
|
|
111
|
-
|
|
112
|
-
def finalise(self) -> None:
|
|
113
|
-
# Count reference photos across all hash bins
|
|
114
|
-
# The result dict maps hash values -> sequence indices -> photo index lists
|
|
115
|
-
# We sum the length of all photo index lists across all bins
|
|
116
|
-
self.ref_photos_final = sum(len(indices) for bin_dict in self.result.values() for indices in bin_dict.values())
|
|
117
|
-
# Sequence count remains unchanged (this stage just bins existing sequences by hash)
|
|
118
|
-
self.ref_seqs_final = self.ref_seqs_init
|
|
119
|
-
|
|
120
|
-
# Invariant: reference photo count should match (this stage doesn't change photos)
|
|
121
|
-
assert self.ref_photos_final == self.ref_photos_init, (
|
|
122
|
-
f"ComputePerceptualHash: reference photo count mismatch - "
|
|
123
|
-
f"started with {self.ref_photos_init}, ended with {self.ref_photos_final}"
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
# Typed result field - perceptual hash bins
|
|
127
|
-
result: dict[bytes, dict[int, list[INDEX_T]]]
|
|
1
|
+
"""Implementation of perceptual hashing pipeline stage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from photo_compare import create_comparison_method
|
|
10
|
+
|
|
11
|
+
from .config import CONFIG
|
|
12
|
+
from .photo_file import load_normalized_pixels
|
|
13
|
+
from .pipeline_stage import PipelineStage, PrepareResult, WorkerResult
|
|
14
|
+
from .ports import InputPort, OutputPort
|
|
15
|
+
from .sequence import (
|
|
16
|
+
INDEX_T,
|
|
17
|
+
PhotoSequence,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def make_defaultdict() -> dict[int, list[INDEX_T]]:
|
|
22
|
+
"""Inner helper function needs to be named as lambda : defaultdict(list) does not pickle."""
|
|
23
|
+
return defaultdict(list)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ComputePerceptualHash(
|
|
27
|
+
PipelineStage[
|
|
28
|
+
tuple[int, INDEX_T, str], # S: work item
|
|
29
|
+
tuple[int, INDEX_T, bytes], # T: work data
|
|
30
|
+
dict[bytes, dict[int, list[INDEX_T]]], # R: accumulator
|
|
31
|
+
]
|
|
32
|
+
):
|
|
33
|
+
def __init__(self) -> None:
|
|
34
|
+
"""Initialize the perceptual matching stage."""
|
|
35
|
+
super().__init__(
|
|
36
|
+
path=CONFIG.paths.perceptual_hash_bins_pkl,
|
|
37
|
+
stage_name="Perceptual Hash Calculation",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Store worker argument
|
|
41
|
+
self.args = self.stage_name
|
|
42
|
+
|
|
43
|
+
# Create input port for forest (from ComputeIndices)
|
|
44
|
+
self.forest_i: InputPort[list[PhotoSequence]] = InputPort("forest")
|
|
45
|
+
|
|
46
|
+
# Create output port for perceptual hash bins
|
|
47
|
+
self.perceptual_bins_o: OutputPort[dict[bytes, dict[int, list[INDEX_T]]]] = OutputPort(
|
|
48
|
+
self, getter=lambda: self.result
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def prepare(
|
|
52
|
+
self,
|
|
53
|
+
) -> PrepareResult[tuple[int, INDEX_T, str], dict[bytes, dict[int, list[INDEX_T]]]]:
|
|
54
|
+
"""Prepare perceptual hash work items by reading forest from input port.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Tuple of (work_items, result_accumulator)
|
|
58
|
+
"""
|
|
59
|
+
# Read forest from input port
|
|
60
|
+
forest = self.forest_i.read()
|
|
61
|
+
# Get reference counts from upstream for UI statistics tracking
|
|
62
|
+
self.ref_photos_init = self.forest_i.get_ref_photo_count()
|
|
63
|
+
self.ref_seqs_init = self.forest_i.get_ref_sequence_count()
|
|
64
|
+
# Count total photos for internal invariant checking (should never change)
|
|
65
|
+
self.total_photos = sum(seq.n_photos for seq in forest)
|
|
66
|
+
|
|
67
|
+
# Create work items from all photos in all sequences
|
|
68
|
+
work: list[tuple[int, INDEX_T, str]] = [
|
|
69
|
+
(seq_idx, idx, str(photo.path))
|
|
70
|
+
for seq_idx, seq in enumerate(forest)
|
|
71
|
+
for idx, photo in seq.get_reference().items()
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
# Initialize result accumulator
|
|
75
|
+
result: dict[bytes, dict[int, list[INDEX_T]]] = defaultdict(make_defaultdict)
|
|
76
|
+
|
|
77
|
+
# ASSERTION: Verify work items created for all reference photos
|
|
78
|
+
assert len(work) == sum(len(seq.get_reference()) for seq in forest), (
|
|
79
|
+
f"Work item count mismatch: have {len(work)} work items"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return work, result
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def stage_worker(cls, job: tuple[int, INDEX_T, str], _args: str) -> WorkerResult[tuple[int, INDEX_T, bytes]]:
|
|
86
|
+
"""Calculate perceptual hash for a photo.
|
|
87
|
+
|
|
88
|
+
Normalizes to landscape orientation before calculating hash to ensure
|
|
89
|
+
consistent hash values regardless of portrait/landscape orientation.
|
|
90
|
+
"""
|
|
91
|
+
seq_idx, idx, path = job
|
|
92
|
+
cmp = create_comparison_method(CONFIG.sequences.PERCEPTUAL_METHOD)
|
|
93
|
+
|
|
94
|
+
# Load with EXIF normalization
|
|
95
|
+
pixels = load_normalized_pixels(path)
|
|
96
|
+
|
|
97
|
+
# Additional normalization: rotate portrait to landscape for consistent phash
|
|
98
|
+
# Portrait photos (width < height) are rotated 90° CCW to landscape
|
|
99
|
+
if pixels.shape[1] < pixels.shape[0]: # width < height
|
|
100
|
+
pixels = np.rot90(pixels, k=1) # Rotate 90° CCW
|
|
101
|
+
|
|
102
|
+
return [], [], (seq_idx, idx, cmp.prepare(pixels))
|
|
103
|
+
|
|
104
|
+
def accumulate_results(
|
|
105
|
+
self,
|
|
106
|
+
accum: dict[bytes, dict[int, list[INDEX_T]]],
|
|
107
|
+
job: tuple[int, INDEX_T, bytes],
|
|
108
|
+
) -> None:
|
|
109
|
+
seq, idx, key = job
|
|
110
|
+
accum[key][seq].append(idx)
|
|
111
|
+
|
|
112
|
+
def finalise(self) -> None:
|
|
113
|
+
# Count reference photos across all hash bins
|
|
114
|
+
# The result dict maps hash values -> sequence indices -> photo index lists
|
|
115
|
+
# We sum the length of all photo index lists across all bins
|
|
116
|
+
self.ref_photos_final = sum(len(indices) for bin_dict in self.result.values() for indices in bin_dict.values())
|
|
117
|
+
# Sequence count remains unchanged (this stage just bins existing sequences by hash)
|
|
118
|
+
self.ref_seqs_final = self.ref_seqs_init
|
|
119
|
+
|
|
120
|
+
# Invariant: reference photo count should match (this stage doesn't change photos)
|
|
121
|
+
assert self.ref_photos_final == self.ref_photos_init, (
|
|
122
|
+
f"ComputePerceptualHash: reference photo count mismatch - "
|
|
123
|
+
f"started with {self.ref_photos_init}, ended with {self.ref_photos_final}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Typed result field - perceptual hash bins
|
|
127
|
+
result: dict[bytes, dict[int, list[INDEX_T]]]
|