photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
utils/plot_helpers.py CHANGED
@@ -1,123 +1,123 @@
1
- """Plotting utilities for benchmark analysis and visualization.
2
-
3
- This module provides standardized plotting functions that encapsulate
4
- matplotlib configuration and boilerplate, ensuring consistent visualization
5
- across the codebase.
6
-
7
- All functions save plots directly to files rather than displaying them,
8
- making them suitable for automated analysis pipelines.
9
- """
10
-
11
- from pathlib import Path
12
-
13
- import matplotlib.pyplot as plt
14
- import numpy as np
15
- import pandas as pd
16
- from numpy.typing import NDArray
17
-
18
-
19
- def save_histogram_comparison(
20
- pos_data: NDArray[np.float64],
21
- neg_data: NDArray[np.float64],
22
- threshold: float,
23
- method_name: str,
24
- output_path: Path,
25
- ) -> None:
26
- """Generate and save a histogram comparing similar/dissimilar distributions.
27
-
28
- Creates overlaid histograms showing the distribution of scores for
29
- similar pairs (pos_data) and dissimilar pairs (neg_data), with a
30
- vertical line indicating the decision threshold.
31
-
32
- Args:
33
- pos_data: Scores for similar pairs (positive class)
34
- neg_data: Scores for dissimilar pairs (negative class)
35
- threshold: Decision threshold to display as vertical line
36
- method_name: Name of the comparison method (for plot title)
37
- output_path: Path where the plot should be saved
38
-
39
- Note:
40
- Uses 50 bins and 50% transparency for overlapping histograms.
41
- The plot is saved and closed automatically.
42
- """
43
- plt.figure(figsize=(10, 6))
44
- plt.hist(pos_data, bins=50, alpha=0.5, label="Similar", color="green")
45
- plt.hist(neg_data, bins=50, alpha=0.5, label="Dissimilar", color="red")
46
- plt.axvline(threshold, color="black", linestyle="--", label="Threshold")
47
- plt.xlabel("Score")
48
- plt.ylabel("Frequency")
49
- plt.title(f"{method_name} - Score Distribution")
50
- plt.legend()
51
- plt.tight_layout()
52
- plt.savefig(output_path)
53
- plt.close()
54
-
55
-
56
- def save_correlation_heatmap(
57
- corr_matrix: pd.DataFrame,
58
- output_path: Path,
59
- ) -> None:
60
- """Generate and save a correlation heatmap for method comparison.
61
-
62
- Creates a heatmap showing correlation coefficients between different
63
- comparison methods, using a diverging colormap centered at zero.
64
-
65
- Args:
66
- corr_matrix: Square correlation matrix (DataFrame with method names as index/columns)
67
- output_path: Path where the plot should be saved
68
-
69
- Note:
70
- Uses 'coolwarm' colormap with values clamped to [-1, 1] range.
71
- Correlation values are annotated on the heatmap.
72
- """
73
- plt.figure(figsize=(12, 10))
74
- plt.imshow(corr_matrix, cmap="coolwarm", vmin=-1, vmax=1, aspect="auto")
75
- plt.colorbar(label="Correlation")
76
- plt.xticks(range(len(corr_matrix.columns)), list(corr_matrix.columns), rotation=90)
77
- plt.yticks(range(len(corr_matrix.index)), list(corr_matrix.index))
78
- plt.title("Method Correlation Matrix")
79
-
80
- # Annotate correlation values
81
- for i in range(len(corr_matrix.index)):
82
- for j in range(len(corr_matrix.columns)):
83
- value = corr_matrix.iloc[i, j]
84
- plt.text(j, i, f"{value:.2f}", ha="center", va="center", color="black")
85
-
86
- plt.tight_layout()
87
- plt.savefig(output_path)
88
- plt.close()
89
-
90
-
91
- def save_pca_scatter(
92
- x_pca: NDArray[np.float64],
93
- y_true: NDArray[np.int_],
94
- explained_variance: list[float],
95
- output_path: Path,
96
- ) -> None:
97
- """Generate and save a PCA scatter plot with class coloring.
98
-
99
- Creates a 2D scatter plot of the first two principal components,
100
- with points colored by their true class labels.
101
-
102
- Args:
103
- x_pca: PCA-transformed coordinates (n_samples x n_components, uses first 2)
104
- y_true: True class labels (0 for dissimilar, 1 for similar)
105
- explained_variance: Variance explained by each principal component (uses first 2)
106
- output_path: Path where the plot should be saved
107
-
108
- Note:
109
- Axis labels include the percentage of variance explained.
110
- Similar pairs are shown in green, dissimilar in red.
111
- """
112
- plt.figure(figsize=(10, 8))
113
- for label, color, name in [(1, "green", "Similar"), (0, "red", "Dissimilar")]:
114
- mask = y_true == label
115
- plt.scatter(x_pca[mask, 0], x_pca[mask, 1], c=color, label=name, alpha=0.5)
116
-
117
- plt.xlabel(f"PC1 ({explained_variance[0]:.1%} variance)")
118
- plt.ylabel(f"PC2 ({explained_variance[1]:.1%} variance)")
119
- plt.title("PCA: Method Scores by True Label")
120
- plt.legend()
121
- plt.tight_layout()
122
- plt.savefig(output_path)
123
- plt.close()
1
+ """Plotting utilities for benchmark analysis and visualization.
2
+
3
+ This module provides standardized plotting functions that encapsulate
4
+ matplotlib configuration and boilerplate, ensuring consistent visualization
5
+ across the codebase.
6
+
7
+ All functions save plots directly to files rather than displaying them,
8
+ making them suitable for automated analysis pipelines.
9
+ """
10
+
11
+ from pathlib import Path
12
+
13
+ import matplotlib.pyplot as plt
14
+ import numpy as np
15
+ import pandas as pd
16
+ from numpy.typing import NDArray
17
+
18
+
19
+ def save_histogram_comparison(
20
+ pos_data: NDArray[np.float64],
21
+ neg_data: NDArray[np.float64],
22
+ threshold: float,
23
+ method_name: str,
24
+ output_path: Path,
25
+ ) -> None:
26
+ """Generate and save a histogram comparing similar/dissimilar distributions.
27
+
28
+ Creates overlaid histograms showing the distribution of scores for
29
+ similar pairs (pos_data) and dissimilar pairs (neg_data), with a
30
+ vertical line indicating the decision threshold.
31
+
32
+ Args:
33
+ pos_data: Scores for similar pairs (positive class)
34
+ neg_data: Scores for dissimilar pairs (negative class)
35
+ threshold: Decision threshold to display as vertical line
36
+ method_name: Name of the comparison method (for plot title)
37
+ output_path: Path where the plot should be saved
38
+
39
+ Note:
40
+ Uses 50 bins and 50% transparency for overlapping histograms.
41
+ The plot is saved and closed automatically.
42
+ """
43
+ plt.figure(figsize=(10, 6))
44
+ plt.hist(pos_data, bins=50, alpha=0.5, label="Similar", color="green")
45
+ plt.hist(neg_data, bins=50, alpha=0.5, label="Dissimilar", color="red")
46
+ plt.axvline(threshold, color="black", linestyle="--", label="Threshold")
47
+ plt.xlabel("Score")
48
+ plt.ylabel("Frequency")
49
+ plt.title(f"{method_name} - Score Distribution")
50
+ plt.legend()
51
+ plt.tight_layout()
52
+ plt.savefig(output_path)
53
+ plt.close()
54
+
55
+
56
+ def save_correlation_heatmap(
57
+ corr_matrix: pd.DataFrame,
58
+ output_path: Path,
59
+ ) -> None:
60
+ """Generate and save a correlation heatmap for method comparison.
61
+
62
+ Creates a heatmap showing correlation coefficients between different
63
+ comparison methods, using a diverging colormap centered at zero.
64
+
65
+ Args:
66
+ corr_matrix: Square correlation matrix (DataFrame with method names as index/columns)
67
+ output_path: Path where the plot should be saved
68
+
69
+ Note:
70
+ Uses 'coolwarm' colormap with values clamped to [-1, 1] range.
71
+ Correlation values are annotated on the heatmap.
72
+ """
73
+ plt.figure(figsize=(12, 10))
74
+ plt.imshow(corr_matrix, cmap="coolwarm", vmin=-1, vmax=1, aspect="auto")
75
+ plt.colorbar(label="Correlation")
76
+ plt.xticks(range(len(corr_matrix.columns)), list(corr_matrix.columns), rotation=90)
77
+ plt.yticks(range(len(corr_matrix.index)), list(corr_matrix.index))
78
+ plt.title("Method Correlation Matrix")
79
+
80
+ # Annotate correlation values
81
+ for i in range(len(corr_matrix.index)):
82
+ for j in range(len(corr_matrix.columns)):
83
+ value = corr_matrix.iloc[i, j]
84
+ plt.text(j, i, f"{value:.2f}", ha="center", va="center", color="black")
85
+
86
+ plt.tight_layout()
87
+ plt.savefig(output_path)
88
+ plt.close()
89
+
90
+
91
+ def save_pca_scatter(
92
+ x_pca: NDArray[np.float64],
93
+ y_true: NDArray[np.int_],
94
+ explained_variance: list[float],
95
+ output_path: Path,
96
+ ) -> None:
97
+ """Generate and save a PCA scatter plot with class coloring.
98
+
99
+ Creates a 2D scatter plot of the first two principal components,
100
+ with points colored by their true class labels.
101
+
102
+ Args:
103
+ x_pca: PCA-transformed coordinates (n_samples x n_components, uses first 2)
104
+ y_true: True class labels (0 for dissimilar, 1 for similar)
105
+ explained_variance: Variance explained by each principal component (uses first 2)
106
+ output_path: Path where the plot should be saved
107
+
108
+ Note:
109
+ Axis labels include the percentage of variance explained.
110
+ Similar pairs are shown in green, dissimilar in red.
111
+ """
112
+ plt.figure(figsize=(10, 8))
113
+ for label, color, name in [(1, "green", "Similar"), (0, "red", "Dissimilar")]:
114
+ mask = y_true == label
115
+ plt.scatter(x_pca[mask, 0], x_pca[mask, 1], c=color, label=name, alpha=0.5)
116
+
117
+ plt.xlabel(f"PC1 ({explained_variance[0]:.1%} variance)")
118
+ plt.ylabel(f"PC2 ({explained_variance[1]:.1%} variance)")
119
+ plt.title("PCA: Method Scores by True Label")
120
+ plt.legend()
121
+ plt.tight_layout()
122
+ plt.savefig(output_path)
123
+ plt.close()
utils/ports.py CHANGED
@@ -1,136 +1,136 @@
1
- """Port-based pipeline connectivity inspired by SystemC/Verilog.
2
-
3
- This module provides typed input/output ports for pipeline stages, enabling:
4
- - Type-safe connections between stages
5
- - Dependency tracking through timestamps
6
- - Decoupling stages from each other and from storage details
7
-
8
- Architecture:
9
- - InputPort: Typed input on a stage (like sc_in<T> in SystemC)
10
- - OutputPort: Typed output on a stage (like sc_out<T> in SystemC)
11
- - Ports connect stages without exposing cache paths or implementation
12
- """
13
-
14
- from __future__ import annotations
15
-
16
- from collections.abc import Callable
17
- from typing import TypeVar
18
-
19
- from .base_ports import BaseInputPort, BaseOutputPort, StageProtocol
20
-
21
- T = TypeVar("T")
22
-
23
-
24
- class OutputPort[T](BaseOutputPort):
25
- """Typed output port on a pipeline stage.
26
-
27
- An output port provides read access to a stage's output data.
28
- Multiple consumers can read from the same output port.
29
-
30
- The port uses a getter callback provided by the stage to access data.
31
- The port itself has no storage - it delegates to the stage's getter.
32
-
33
- Type parameter T specifies the data type this port produces,
34
- enabling compile-time type checking of connections.
35
-
36
- Analogous to: sc_out<T> in SystemC, output wire in Verilog
37
- """
38
-
39
- def __init__(
40
- self,
41
- owner: StageProtocol,
42
- getter: Callable[[], T],
43
- ):
44
- """Initialize output port.
45
-
46
- Args:
47
- owner: The stage that produces this output (used for timestamps)
48
- getter: Callable that returns the output data when called.
49
- Typically a lambda like: lambda: self.result
50
- or lambda: self.result['bins'] for partial data.
51
- The getter is called each time read() is invoked.
52
-
53
- Example:
54
- # Simple case - return entire result
55
- self.output_o = OutputPort(self, lambda: self.result)
56
-
57
- # Multiple ports exposing different parts
58
- self.bins_o = OutputPort(self, lambda: self.sha_bins)
59
- self.forest_o = OutputPort(self, lambda: self.forest)
60
- """
61
- super().__init__(owner)
62
- self.getter = getter
63
-
64
- def read(self) -> T:
65
- """Read output data from owning stage.
66
-
67
- Calls the getter callback to retrieve the current output data.
68
-
69
- Returns:
70
- Output data of type T
71
-
72
- Raises:
73
- Exception: If getter fails (e.g., stage hasn't run yet)
74
- """
75
- return self.getter()
76
-
77
-
78
- class InputPort[T](BaseInputPort):
79
- """Typed input port on a pipeline stage.
80
-
81
- An input port represents a dependency on another stage's output.
82
- It provides read access to upstream data without knowing which
83
- stage produces it or where it's stored.
84
-
85
- The port must be bound to an OutputPort before use. The binding
86
- enforces type compatibility through Generic[T].
87
-
88
- Type parameter T specifies the data type this port consumes,
89
- matching the OutputPort[T] it connects to.
90
-
91
- Analogous to: sc_in<T> in SystemC, input wire in Verilog
92
- """
93
-
94
- def __init__(self, name: str):
95
- """Initialize input port.
96
-
97
- Args:
98
- name: Descriptive name for this input (e.g., "sha_bins", "forest")
99
- """
100
- super().__init__(name)
101
- self._source: OutputPort[T] | None = None
102
-
103
- def bind(self, source: OutputPort[T]) -> None:
104
- """Bind this input to an output port.
105
-
106
- This connects the input to its data source. The Generic[T] type
107
- parameter ensures type compatibility at compile time.
108
-
109
- Args:
110
- source: The output port to read from
111
-
112
- Example:
113
- stage.forest_input.bind(prev_stage.forest_output)
114
- """
115
- self._source = source
116
-
117
- def read(self) -> T:
118
- """Read data from connected output port.
119
-
120
- Returns:
121
- Data of type T from the bound output port
122
-
123
- Raises:
124
- RuntimeError: If port is not bound to a source
125
- """
126
- if self._source is None:
127
- raise RuntimeError(f"Input port '{self.name}' is not bound to any source")
128
- return self._source.read()
129
-
130
- def is_bound(self) -> bool:
131
- """Check if this input is connected to a source.
132
-
133
- Returns:
134
- True if bind() has been called, False otherwise
135
- """
136
- return self._source is not None
1
+ """Port-based pipeline connectivity inspired by SystemC/Verilog.
2
+
3
+ This module provides typed input/output ports for pipeline stages, enabling:
4
+ - Type-safe connections between stages
5
+ - Dependency tracking through timestamps
6
+ - Decoupling stages from each other and from storage details
7
+
8
+ Architecture:
9
+ - InputPort: Typed input on a stage (like sc_in<T> in SystemC)
10
+ - OutputPort: Typed output on a stage (like sc_out<T> in SystemC)
11
+ - Ports connect stages without exposing cache paths or implementation
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Callable
17
+ from typing import TypeVar
18
+
19
+ from .base_ports import BaseInputPort, BaseOutputPort, StageProtocol
20
+
21
+ T = TypeVar("T")
22
+
23
+
24
+ class OutputPort[T](BaseOutputPort):
25
+ """Typed output port on a pipeline stage.
26
+
27
+ An output port provides read access to a stage's output data.
28
+ Multiple consumers can read from the same output port.
29
+
30
+ The port uses a getter callback provided by the stage to access data.
31
+ The port itself has no storage - it delegates to the stage's getter.
32
+
33
+ Type parameter T specifies the data type this port produces,
34
+ enabling compile-time type checking of connections.
35
+
36
+ Analogous to: sc_out<T> in SystemC, output wire in Verilog
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ owner: StageProtocol,
42
+ getter: Callable[[], T],
43
+ ):
44
+ """Initialize output port.
45
+
46
+ Args:
47
+ owner: The stage that produces this output (used for timestamps)
48
+ getter: Callable that returns the output data when called.
49
+ Typically a lambda like: lambda: self.result
50
+ or lambda: self.result['bins'] for partial data.
51
+ The getter is called each time read() is invoked.
52
+
53
+ Example:
54
+ # Simple case - return entire result
55
+ self.output_o = OutputPort(self, lambda: self.result)
56
+
57
+ # Multiple ports exposing different parts
58
+ self.bins_o = OutputPort(self, lambda: self.sha_bins)
59
+ self.forest_o = OutputPort(self, lambda: self.forest)
60
+ """
61
+ super().__init__(owner)
62
+ self.getter = getter
63
+
64
+ def read(self) -> T:
65
+ """Read output data from owning stage.
66
+
67
+ Calls the getter callback to retrieve the current output data.
68
+
69
+ Returns:
70
+ Output data of type T
71
+
72
+ Raises:
73
+ Exception: If getter fails (e.g., stage hasn't run yet)
74
+ """
75
+ return self.getter()
76
+
77
+
78
+ class InputPort[T](BaseInputPort):
79
+ """Typed input port on a pipeline stage.
80
+
81
+ An input port represents a dependency on another stage's output.
82
+ It provides read access to upstream data without knowing which
83
+ stage produces it or where it's stored.
84
+
85
+ The port must be bound to an OutputPort before use. The binding
86
+ enforces type compatibility through Generic[T].
87
+
88
+ Type parameter T specifies the data type this port consumes,
89
+ matching the OutputPort[T] it connects to.
90
+
91
+ Analogous to: sc_in<T> in SystemC, input wire in Verilog
92
+ """
93
+
94
+ def __init__(self, name: str):
95
+ """Initialize input port.
96
+
97
+ Args:
98
+ name: Descriptive name for this input (e.g., "sha_bins", "forest")
99
+ """
100
+ super().__init__(name)
101
+ self._source: OutputPort[T] | None = None
102
+
103
+ def bind(self, source: OutputPort[T]) -> None:
104
+ """Bind this input to an output port.
105
+
106
+ This connects the input to its data source. The Generic[T] type
107
+ parameter ensures type compatibility at compile time.
108
+
109
+ Args:
110
+ source: The output port to read from
111
+
112
+ Example:
113
+ stage.forest_input.bind(prev_stage.forest_output)
114
+ """
115
+ self._source = source
116
+
117
+ def read(self) -> T:
118
+ """Read data from connected output port.
119
+
120
+ Returns:
121
+ Data of type T from the bound output port
122
+
123
+ Raises:
124
+ RuntimeError: If port is not bound to a source
125
+ """
126
+ if self._source is None:
127
+ raise RuntimeError(f"Input port '{self.name}' is not bound to any source")
128
+ return self._source.read()
129
+
130
+ def is_bound(self) -> bool:
131
+ """Check if this input is connected to a source.
132
+
133
+ Returns:
134
+ True if bind() has been called, False otherwise
135
+ """
136
+ return self._source is not None