photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,99 +1,99 @@
1
- """Registry and factory pattern for overlap_metrics library."""
2
-
3
- from __future__ import annotations
4
-
5
- from collections.abc import Callable
6
- from dataclasses import dataclass, field
7
- from typing import Any, TypeVar
8
-
9
- from .core import DensityEstimatorBase, EstimatorName, MetricBase, MetricName
10
- from .estimators import BetaEstimator, BetaMixtureEstimator, HistogramEstimator, LogitKDEEstimator
11
- from .metrics import (
12
- BhattacharyyaDistance,
13
- HellingerDistance,
14
- JensenShannon,
15
- KSStatistic,
16
- SeparationOVL,
17
- TotalVariation,
18
- Wasserstein1D,
19
- )
20
-
21
- T = TypeVar("T")
22
-
23
-
24
- @dataclass
25
- class Registry[T]:
26
- """Generic registry for factory pattern."""
27
-
28
- _ctors: dict[str, Callable[..., T]] = field(default_factory=dict)
29
-
30
- def register(self, name: str, ctor: Callable[..., T]) -> None:
31
- """Register a constructor function."""
32
- if name in self._ctors:
33
- raise ValueError(f"Duplicate registration: {name!r}")
34
- self._ctors[name] = ctor
35
-
36
- def create(self, name: str, **kwargs: Any) -> T:
37
- """Create instance by name with keyword arguments."""
38
- if name not in self._ctors:
39
- known_names: list[str] = sorted(self._ctors.keys())
40
- raise ValueError(f"Unknown key: {name!r}. Known: {known_names}")
41
- return self._ctors[name](**kwargs)
42
-
43
- def list_available(self) -> list[str]:
44
- """List all available registered names."""
45
- return sorted(self._ctors.keys())
46
-
47
-
48
- # Create registry instances
49
- estimator_registry: Registry[DensityEstimatorBase] = Registry()
50
- metric_registry: Registry[MetricBase] = Registry()
51
-
52
-
53
- def _populate_estimator_registry() -> None:
54
- """Populate estimator registry with default implementations."""
55
- estimator_registry.register(EstimatorName.HIST.value, lambda **kwargs: HistogramEstimator(**kwargs))
56
- estimator_registry.register(EstimatorName.BETA.value, lambda **kwargs: BetaEstimator(**kwargs))
57
- estimator_registry.register(EstimatorName.LOGIT_KDE.value, lambda **kwargs: LogitKDEEstimator(**kwargs))
58
- estimator_registry.register(EstimatorName.BETA_MIX.value, lambda **kwargs: BetaMixtureEstimator(**kwargs))
59
-
60
-
61
- def _populate_metric_registry() -> None:
62
- """Populate metric registry with default implementations."""
63
- metric_registry.register(MetricName.SEPARATION_OVL.value, lambda: SeparationOVL())
64
- metric_registry.register(MetricName.BHATTACHARYYA_DISTANCE.value, lambda: BhattacharyyaDistance())
65
- metric_registry.register(MetricName.JENSEN_SHANNON.value, lambda: JensenShannon())
66
- metric_registry.register(MetricName.HELLINGER.value, lambda: HellingerDistance())
67
- metric_registry.register(MetricName.TOTAL_VARIATION.value, lambda: TotalVariation())
68
- metric_registry.register(MetricName.WASSERSTEIN_1D.value, lambda: Wasserstein1D())
69
- metric_registry.register(MetricName.KS_STAT.value, lambda: KSStatistic())
70
-
71
-
72
- def create_estimator(name: EstimatorName, **kwargs: Any) -> DensityEstimatorBase:
73
- """Create density estimator by enum name.
74
-
75
- Args:
76
- name: EstimatorName enum value
77
- **kwargs: Constructor arguments for the estimator
78
-
79
- Returns:
80
- Configured estimator instance
81
- """
82
- return estimator_registry.create(name.value, **kwargs)
83
-
84
-
85
- def create_metric(name: MetricName) -> MetricBase:
86
- """Create metric by enum name.
87
-
88
- Args:
89
- name: MetricName enum value
90
-
91
- Returns:
92
- Metric instance
93
- """
94
- return metric_registry.create(name.value)
95
-
96
-
97
- # Populate registries on import
98
- _populate_estimator_registry()
99
- _populate_metric_registry()
1
+ """Registry and factory pattern for overlap_metrics library."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, TypeVar
8
+
9
+ from .core import DensityEstimatorBase, EstimatorName, MetricBase, MetricName
10
+ from .estimators import BetaEstimator, BetaMixtureEstimator, HistogramEstimator, LogitKDEEstimator
11
+ from .metrics import (
12
+ BhattacharyyaDistance,
13
+ HellingerDistance,
14
+ JensenShannon,
15
+ KSStatistic,
16
+ SeparationOVL,
17
+ TotalVariation,
18
+ Wasserstein1D,
19
+ )
20
+
21
+ T = TypeVar("T")
22
+
23
+
24
+ @dataclass
25
+ class Registry[T]:
26
+ """Generic registry for factory pattern."""
27
+
28
+ _ctors: dict[str, Callable[..., T]] = field(default_factory=dict)
29
+
30
+ def register(self, name: str, ctor: Callable[..., T]) -> None:
31
+ """Register a constructor function."""
32
+ if name in self._ctors:
33
+ raise ValueError(f"Duplicate registration: {name!r}")
34
+ self._ctors[name] = ctor
35
+
36
+ def create(self, name: str, **kwargs: Any) -> T:
37
+ """Create instance by name with keyword arguments."""
38
+ if name not in self._ctors:
39
+ known_names: list[str] = sorted(self._ctors.keys())
40
+ raise ValueError(f"Unknown key: {name!r}. Known: {known_names}")
41
+ return self._ctors[name](**kwargs)
42
+
43
+ def list_available(self) -> list[str]:
44
+ """List all available registered names."""
45
+ return sorted(self._ctors.keys())
46
+
47
+
48
+ # Create registry instances
49
+ estimator_registry: Registry[DensityEstimatorBase] = Registry()
50
+ metric_registry: Registry[MetricBase] = Registry()
51
+
52
+
53
+ def _populate_estimator_registry() -> None:
54
+ """Populate estimator registry with default implementations."""
55
+ estimator_registry.register(EstimatorName.HIST.value, lambda **kwargs: HistogramEstimator(**kwargs))
56
+ estimator_registry.register(EstimatorName.BETA.value, lambda **kwargs: BetaEstimator(**kwargs))
57
+ estimator_registry.register(EstimatorName.LOGIT_KDE.value, lambda **kwargs: LogitKDEEstimator(**kwargs))
58
+ estimator_registry.register(EstimatorName.BETA_MIX.value, lambda **kwargs: BetaMixtureEstimator(**kwargs))
59
+
60
+
61
+ def _populate_metric_registry() -> None:
62
+ """Populate metric registry with default implementations."""
63
+ metric_registry.register(MetricName.SEPARATION_OVL.value, lambda: SeparationOVL())
64
+ metric_registry.register(MetricName.BHATTACHARYYA_DISTANCE.value, lambda: BhattacharyyaDistance())
65
+ metric_registry.register(MetricName.JENSEN_SHANNON.value, lambda: JensenShannon())
66
+ metric_registry.register(MetricName.HELLINGER.value, lambda: HellingerDistance())
67
+ metric_registry.register(MetricName.TOTAL_VARIATION.value, lambda: TotalVariation())
68
+ metric_registry.register(MetricName.WASSERSTEIN_1D.value, lambda: Wasserstein1D())
69
+ metric_registry.register(MetricName.KS_STAT.value, lambda: KSStatistic())
70
+
71
+
72
+ def create_estimator(name: EstimatorName, **kwargs: Any) -> DensityEstimatorBase:
73
+ """Create density estimator by enum name.
74
+
75
+ Args:
76
+ name: EstimatorName enum value
77
+ **kwargs: Constructor arguments for the estimator
78
+
79
+ Returns:
80
+ Configured estimator instance
81
+ """
82
+ return estimator_registry.create(name.value, **kwargs)
83
+
84
+
85
+ def create_metric(name: MetricName) -> MetricBase:
86
+ """Create metric by enum name.
87
+
88
+ Args:
89
+ name: MetricName enum value
90
+
91
+ Returns:
92
+ Metric instance
93
+ """
94
+ return metric_registry.create(name.value)
95
+
96
+
97
+ # Populate registries on import
98
+ _populate_estimator_registry()
99
+ _populate_metric_registry()
overlap_metrics/utils.py CHANGED
@@ -1,104 +1,104 @@
1
- """Utility functions for overlap_metrics library."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import cast
6
-
7
- import numpy as np
8
- from numpy.typing import NDArray
9
-
10
- from .config import NUMERICS, VALIDATION
11
-
12
-
13
- def make_grid(n_grid: int, mode: str) -> NDArray[np.float64]:
14
- """Create evaluation grid on [0,1] for numerical integration.
15
-
16
- Args:
17
- n_grid: Number of grid points
18
- mode: Grid type ('uniform' or 'edge_dense')
19
-
20
- Returns:
21
- Grid points as 1D array
22
- """
23
- if n_grid < 2:
24
- raise ValueError(f"Need at least 2 grid points, got {n_grid}")
25
- if n_grid > VALIDATION.MAX_GRID_SIZE:
26
- raise ValueError(f"Grid too large: {n_grid} > {VALIDATION.MAX_GRID_SIZE}")
27
-
28
- if mode == "uniform":
29
- return np.linspace(NUMERICS.SCORE_MIN, NUMERICS.SCORE_MAX, n_grid, dtype=NUMERICS.DTYPE_FLOAT)
30
- if mode == "edge_dense":
31
- # Dense grids near edges, coarser in middle
32
- quarter_size: int = n_grid // 4
33
- half_size: int = n_grid // 2
34
- remaining_size: int = n_grid - quarter_size - half_size
35
-
36
- left_edge: NDArray[np.float64] = np.linspace(0.0, 0.05, quarter_size, dtype=NUMERICS.DTYPE_FLOAT)
37
- middle: NDArray[np.float64] = np.linspace(0.05, 0.95, half_size, dtype=NUMERICS.DTYPE_FLOAT)
38
- right_edge: NDArray[np.float64] = np.linspace(0.95, 1.0, remaining_size, dtype=NUMERICS.DTYPE_FLOAT)
39
-
40
- return cast(NDArray[np.float64], np.unique(np.concatenate([left_edge, middle, right_edge])))
41
- raise ValueError(f"Unknown grid mode: {mode!r}")
42
-
43
-
44
- def safe_log(x: NDArray[np.float64]) -> NDArray[np.float64]:
45
- """Compute logarithm with safe floor to prevent -inf."""
46
- return cast(NDArray[np.float64], np.log(np.maximum(x, NUMERICS.LOG_FLOOR)))
47
-
48
-
49
- def safe_divide(numerator: NDArray[np.float64], denominator: NDArray[np.float64]) -> NDArray[np.float64]:
50
- """Safe division with floor on denominator."""
51
- return cast(NDArray[np.float64], numerator / np.maximum(denominator, NUMERICS.DIVISION_FLOOR))
52
-
53
-
54
- def kl_divergence(p: NDArray[np.float64], q: NDArray[np.float64], xs: NDArray[np.float64]) -> float:
55
- """Compute KL divergence KL(p||q) = ∫ p log(p/q) dx with safe handling."""
56
- # Only compute where p > 0 to avoid 0*log(0) issues
57
- mask: NDArray[np.bool_] = p > NUMERICS.LOG_FLOOR
58
- p_safe: NDArray[np.float64] = p[mask]
59
- q_safe: NDArray[np.float64] = np.maximum(q[mask], NUMERICS.LOG_FLOOR)
60
- xs_safe: NDArray[np.float64] = xs[mask] # Actual x coordinates of remaining points
61
-
62
- if len(p_safe) == 0:
63
- return 0.0
64
-
65
- log_ratio: NDArray[np.float64] = safe_log(p_safe) - safe_log(q_safe)
66
- integrand: NDArray[np.float64] = p_safe * log_ratio
67
-
68
- # Use trapezoidal rule with actual x coordinates
69
- return float(np.trapezoid(integrand, xs_safe))
70
-
71
-
72
- def validate_samples(samples: NDArray[np.float64], name: str) -> None:
73
- """Validate sample array properties."""
74
- if samples.ndim != 1:
75
- raise ValueError(f"{name} samples must be 1D, got shape {samples.shape}")
76
- if len(samples) < VALIDATION.MIN_SAMPLES:
77
- raise ValueError(f"{name} needs at least {VALIDATION.MIN_SAMPLES} samples, got {len(samples)}")
78
- if not np.all(np.isfinite(samples)):
79
- raise ValueError(f"{name} samples contain non-finite values")
80
- if not np.all((samples >= NUMERICS.SCORE_MIN) & (samples <= NUMERICS.SCORE_MAX)):
81
- raise ValueError(f"{name} samples must be in [{NUMERICS.SCORE_MIN}, {NUMERICS.SCORE_MAX}]")
82
-
83
-
84
- def validate_weights(weights: NDArray[np.float64], n_samples: int, name: str) -> None:
85
- """Validate weight array properties."""
86
- if weights.ndim != 1:
87
- raise ValueError(f"{name} weights must be 1D, got shape {weights.shape}")
88
- if len(weights) != n_samples:
89
- raise ValueError(f"{name} weights length {len(weights)} != samples length {n_samples}")
90
- if not np.all(weights >= 0):
91
- raise ValueError(f"{name} weights must be non-negative")
92
- if not np.all(np.isfinite(weights)):
93
- raise ValueError(f"{name} weights contain non-finite values")
94
- if np.sum(weights) <= 0:
95
- raise ValueError(f"{name} weights must have positive sum")
96
-
97
-
98
- def check_pdf_normalization(estimator_name: str, integral_value: float) -> None:
99
- """Check that PDF integrates to approximately 1."""
100
- if abs(integral_value - 1.0) > NUMERICS.INTEGRAL_TOLERANCE:
101
- raise RuntimeError(
102
- f"PDF for {estimator_name} integrates to {integral_value:.6f}, "
103
- f"expected ~1.0 (tolerance {NUMERICS.INTEGRAL_TOLERANCE})"
104
- )
1
+ """Utility functions for overlap_metrics library."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import cast
6
+
7
+ import numpy as np
8
+ from numpy.typing import NDArray
9
+
10
+ from .config import NUMERICS, VALIDATION
11
+
12
+
13
+ def make_grid(n_grid: int, mode: str) -> NDArray[np.float64]:
14
+ """Create evaluation grid on [0,1] for numerical integration.
15
+
16
+ Args:
17
+ n_grid: Number of grid points
18
+ mode: Grid type ('uniform' or 'edge_dense')
19
+
20
+ Returns:
21
+ Grid points as 1D array
22
+ """
23
+ if n_grid < 2:
24
+ raise ValueError(f"Need at least 2 grid points, got {n_grid}")
25
+ if n_grid > VALIDATION.MAX_GRID_SIZE:
26
+ raise ValueError(f"Grid too large: {n_grid} > {VALIDATION.MAX_GRID_SIZE}")
27
+
28
+ if mode == "uniform":
29
+ return np.linspace(NUMERICS.SCORE_MIN, NUMERICS.SCORE_MAX, n_grid, dtype=NUMERICS.DTYPE_FLOAT)
30
+ if mode == "edge_dense":
31
+ # Dense grids near edges, coarser in middle
32
+ quarter_size: int = n_grid // 4
33
+ half_size: int = n_grid // 2
34
+ remaining_size: int = n_grid - quarter_size - half_size
35
+
36
+ left_edge: NDArray[np.float64] = np.linspace(0.0, 0.05, quarter_size, dtype=NUMERICS.DTYPE_FLOAT)
37
+ middle: NDArray[np.float64] = np.linspace(0.05, 0.95, half_size, dtype=NUMERICS.DTYPE_FLOAT)
38
+ right_edge: NDArray[np.float64] = np.linspace(0.95, 1.0, remaining_size, dtype=NUMERICS.DTYPE_FLOAT)
39
+
40
+ return cast(NDArray[np.float64], np.unique(np.concatenate([left_edge, middle, right_edge])))
41
+ raise ValueError(f"Unknown grid mode: {mode!r}")
42
+
43
+
44
+ def safe_log(x: NDArray[np.float64]) -> NDArray[np.float64]:
45
+ """Compute logarithm with safe floor to prevent -inf."""
46
+ return cast(NDArray[np.float64], np.log(np.maximum(x, NUMERICS.LOG_FLOOR)))
47
+
48
+
49
+ def safe_divide(numerator: NDArray[np.float64], denominator: NDArray[np.float64]) -> NDArray[np.float64]:
50
+ """Safe division with floor on denominator."""
51
+ return cast(NDArray[np.float64], numerator / np.maximum(denominator, NUMERICS.DIVISION_FLOOR))
52
+
53
+
54
+ def kl_divergence(p: NDArray[np.float64], q: NDArray[np.float64], xs: NDArray[np.float64]) -> float:
55
+ """Compute KL divergence KL(p||q) = ∫ p log(p/q) dx with safe handling."""
56
+ # Only compute where p > 0 to avoid 0*log(0) issues
57
+ mask: NDArray[np.bool_] = p > NUMERICS.LOG_FLOOR
58
+ p_safe: NDArray[np.float64] = p[mask]
59
+ q_safe: NDArray[np.float64] = np.maximum(q[mask], NUMERICS.LOG_FLOOR)
60
+ xs_safe: NDArray[np.float64] = xs[mask] # Actual x coordinates of remaining points
61
+
62
+ if len(p_safe) == 0:
63
+ return 0.0
64
+
65
+ log_ratio: NDArray[np.float64] = safe_log(p_safe) - safe_log(q_safe)
66
+ integrand: NDArray[np.float64] = p_safe * log_ratio
67
+
68
+ # Use trapezoidal rule with actual x coordinates
69
+ return float(np.trapezoid(integrand, xs_safe))
70
+
71
+
72
+ def validate_samples(samples: NDArray[np.float64], name: str) -> None:
73
+ """Validate sample array properties."""
74
+ if samples.ndim != 1:
75
+ raise ValueError(f"{name} samples must be 1D, got shape {samples.shape}")
76
+ if len(samples) < VALIDATION.MIN_SAMPLES:
77
+ raise ValueError(f"{name} needs at least {VALIDATION.MIN_SAMPLES} samples, got {len(samples)}")
78
+ if not np.all(np.isfinite(samples)):
79
+ raise ValueError(f"{name} samples contain non-finite values")
80
+ if not np.all((samples >= NUMERICS.SCORE_MIN) & (samples <= NUMERICS.SCORE_MAX)):
81
+ raise ValueError(f"{name} samples must be in [{NUMERICS.SCORE_MIN}, {NUMERICS.SCORE_MAX}]")
82
+
83
+
84
+ def validate_weights(weights: NDArray[np.float64], n_samples: int, name: str) -> None:
85
+ """Validate weight array properties."""
86
+ if weights.ndim != 1:
87
+ raise ValueError(f"{name} weights must be 1D, got shape {weights.shape}")
88
+ if len(weights) != n_samples:
89
+ raise ValueError(f"{name} weights length {len(weights)} != samples length {n_samples}")
90
+ if not np.all(weights >= 0):
91
+ raise ValueError(f"{name} weights must be non-negative")
92
+ if not np.all(np.isfinite(weights)):
93
+ raise ValueError(f"{name} weights contain non-finite values")
94
+ if np.sum(weights) <= 0:
95
+ raise ValueError(f"{name} weights must have positive sum")
96
+
97
+
98
+ def check_pdf_normalization(estimator_name: str, integral_value: float) -> None:
99
+ """Check that PDF integrates to approximately 1."""
100
+ if abs(integral_value - 1.0) > NUMERICS.INTEGRAL_TOLERANCE:
101
+ raise RuntimeError(
102
+ f"PDF for {estimator_name} integrates to {integral_value:.6f}, "
103
+ f"expected ~1.0 (tolerance {NUMERICS.INTEGRAL_TOLERANCE})"
104
+ )
photo_compare/__init__.py CHANGED
@@ -43,7 +43,7 @@ from .structural_methods import HOGMethod, MultiScaleSSIMMethod, SSIMMethod
43
43
 
44
44
  # Version information
45
45
  __version__ = "1.0.0"
46
- __author__ = "Photo Deduplication Team"
46
+ __author__ = "Photo Stack Finder Team"
47
47
  __description__ = "Image similarity methods with integrated caching and factory pattern"
48
48
 
49
49