photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. orchestrator/__init__.py +2 -2
  2. orchestrator/app.py +6 -11
  3. orchestrator/build_pipeline.py +19 -21
  4. orchestrator/orchestrator_runner.py +11 -8
  5. orchestrator/pipeline_builder.py +126 -126
  6. orchestrator/pipeline_orchestrator.py +604 -604
  7. orchestrator/review_persistence.py +162 -162
  8. orchestrator/static/orchestrator.css +76 -76
  9. orchestrator/static/orchestrator.html +11 -5
  10. orchestrator/static/orchestrator.js +3 -1
  11. overlap_metrics/__init__.py +1 -1
  12. overlap_metrics/config.py +135 -135
  13. overlap_metrics/core.py +284 -284
  14. overlap_metrics/estimators.py +292 -292
  15. overlap_metrics/metrics.py +307 -307
  16. overlap_metrics/registry.py +99 -99
  17. overlap_metrics/utils.py +104 -104
  18. photo_compare/__init__.py +1 -1
  19. photo_compare/base.py +285 -285
  20. photo_compare/config.py +225 -225
  21. photo_compare/distance.py +15 -15
  22. photo_compare/feature_methods.py +173 -173
  23. photo_compare/file_hash.py +29 -29
  24. photo_compare/hash_methods.py +99 -99
  25. photo_compare/histogram_methods.py +118 -118
  26. photo_compare/pixel_methods.py +58 -58
  27. photo_compare/structural_methods.py +104 -104
  28. photo_compare/types.py +28 -28
  29. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
  30. photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
  31. scripts/orchestrate.py +12 -10
  32. utils/__init__.py +4 -3
  33. utils/base_pipeline_stage.py +171 -171
  34. utils/base_ports.py +176 -176
  35. utils/benchmark_utils.py +823 -823
  36. utils/channel.py +74 -74
  37. utils/comparison_gates.py +40 -21
  38. utils/compute_benchmarks.py +355 -355
  39. utils/compute_identical.py +94 -24
  40. utils/compute_indices.py +235 -235
  41. utils/compute_perceptual_hash.py +127 -127
  42. utils/compute_perceptual_match.py +240 -240
  43. utils/compute_sha_bins.py +64 -20
  44. utils/compute_template_similarity.py +1 -1
  45. utils/compute_versions.py +483 -483
  46. utils/config.py +8 -5
  47. utils/data_io.py +83 -83
  48. utils/graph_context.py +44 -44
  49. utils/logger.py +2 -2
  50. utils/models.py +2 -2
  51. utils/photo_file.py +90 -91
  52. utils/pipeline_graph.py +334 -334
  53. utils/pipeline_stage.py +408 -408
  54. utils/plot_helpers.py +123 -123
  55. utils/ports.py +136 -136
  56. utils/progress.py +415 -415
  57. utils/report_builder.py +139 -139
  58. utils/review_types.py +55 -55
  59. utils/review_utils.py +10 -19
  60. utils/sequence.py +10 -8
  61. utils/sequence_clustering.py +1 -1
  62. utils/template.py +57 -57
  63. utils/template_parsing.py +71 -0
  64. photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
  65. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
  66. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
  67. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
  68. {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
@@ -1,292 +1,292 @@
1
- """Density estimator implementations for overlap_metrics library."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import cast
6
-
7
- import numpy as np
8
- import numpy.typing as npt
9
- from scipy import stats
10
-
11
- from .config import DEFAULTS, NUMERICS
12
- from .core import DensityEstimatorBase
13
- from .utils import validate_samples, validate_weights
14
-
15
-
16
- class HistogramEstimator(DensityEstimatorBase):
17
- """Histogram-based density estimator with optional smoothing."""
18
-
19
- def __init__(self, n_bins: int = DEFAULTS.HIST_N_BINS, smooth: bool = DEFAULTS.HIST_SMOOTH):
20
- """Create histogram estimator.
21
-
22
- Args:
23
- n_bins: Number of histogram bins
24
- smooth: Apply Laplace smoothing if True
25
- """
26
- super().__init__(name="hist")
27
- self.n_bins: int = n_bins
28
- self.smooth: bool = smooth
29
- self._bin_edges: npt.NDArray[np.float64] | None = None
30
- self._bin_densities: npt.NDArray[np.float64] | None = None
31
-
32
- def fit(
33
- self,
34
- samples: npt.NDArray[np.float64],
35
- weights: npt.NDArray[np.float64] | None = None,
36
- random_state: int | None = None,
37
- ) -> None:
38
- """Fit histogram to samples."""
39
- validate_samples(samples, self.name)
40
- if weights is not None:
41
- validate_weights(weights, len(samples), self.name)
42
-
43
- # Create histogram
44
- counts: npt.NDArray[np.float64]
45
- bin_edges: npt.NDArray[np.float64]
46
- counts, bin_edges = np.histogram(
47
- samples,
48
- bins=self.n_bins,
49
- range=(NUMERICS.SCORE_MIN, NUMERICS.SCORE_MAX),
50
- weights=weights,
51
- )
52
-
53
- # Apply Laplace smoothing if requested
54
- if self.smooth:
55
- counts = counts + 1.0
56
-
57
- # Normalize to get density (integral = 1)
58
- bin_widths: npt.NDArray[np.float64] = np.diff(bin_edges)
59
- densities: npt.NDArray[np.float64] = counts / (counts.sum() * bin_widths)
60
-
61
- self._bin_edges = bin_edges
62
- self._bin_densities = densities
63
- self._mark_fitted()
64
-
65
- def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
66
- """Evaluate PDF at points x using piecewise constant histogram."""
67
- self._check_fitted()
68
- assert self._bin_edges is not None # Guaranteed by _check_fitted()
69
- assert self._bin_densities is not None # Guaranteed by _check_fitted()
70
-
71
- # Find which bin each x falls into
72
- bin_indices: npt.NDArray[np.int_] = np.searchsorted(self._bin_edges[:-1], x, side="right") - 1
73
-
74
- # Clip to valid range
75
- bin_indices = np.clip(bin_indices, 0, len(self._bin_densities) - 1)
76
-
77
- # Return density for each bin
78
- return self._bin_densities[bin_indices]
79
-
80
-
81
- class BetaEstimator(DensityEstimatorBase):
82
- """Beta distribution estimator using method of moments."""
83
-
84
- def __init__(self, eps: float = DEFAULTS.BETA_EPS):
85
- """Create Beta estimator.
86
-
87
- Args:
88
- eps: Small value to add/subtract from bounds to avoid numerical issues
89
- """
90
- super().__init__(name="beta")
91
- self.eps: float = eps
92
- self._alpha: float | None = None
93
- self._beta: float | None = None
94
-
95
- def fit(
96
- self,
97
- samples: npt.NDArray[np.float64],
98
- weights: npt.NDArray[np.float64] | None = None,
99
- random_state: int | None = None,
100
- ) -> None:
101
- """Fit Beta distribution using method of moments."""
102
- validate_samples(samples, self.name)
103
- w: npt.NDArray[np.float64] | None
104
- if weights is not None:
105
- validate_weights(weights, len(samples), self.name)
106
- # Normalize weights
107
- w = weights / weights.sum()
108
- else:
109
- w = None
110
-
111
- # Transform samples away from boundaries
112
- samples_trans: npt.NDArray[np.float64] = np.clip(
113
- samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
114
- )
115
-
116
- # Compute weighted moments
117
- mean: float
118
- var: float
119
- if w is None:
120
- mean = float(np.mean(samples_trans))
121
- var = float(np.var(samples_trans))
122
- else:
123
- mean = float(np.sum(w * samples_trans))
124
- var = float(np.sum(w * (samples_trans - mean) ** 2))
125
-
126
- # Method of moments: solve for alpha, beta
127
- if var <= 0 or var >= mean * (1 - mean):
128
- # Fallback to uniform-ish distribution
129
- self._alpha = 1.0
130
- self._beta = 1.0
131
- else:
132
- common: float = mean * (1 - mean) / var - 1
133
- self._alpha = mean * common
134
- self._beta = (1 - mean) * common
135
-
136
- # Ensure positive parameters
137
- self._alpha = max(0.1, self._alpha)
138
- self._beta = max(0.1, self._beta)
139
-
140
- self._mark_fitted()
141
-
142
- def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
143
- """Evaluate Beta PDF at points x."""
144
- self._check_fitted()
145
- assert self._alpha is not None and self._beta is not None # Guaranteed by _check_fitted()
146
-
147
- # Clip to valid range
148
- x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
149
-
150
- # Use scipy's beta distribution
151
- return stats.beta.pdf(x_clipped, self._alpha, self._beta)
152
-
153
-
154
- class LogitKDEEstimator(DensityEstimatorBase):
155
- """KDE on logit-transformed [0,1] samples."""
156
-
157
- def __init__(self, eps: float = DEFAULTS.LOGIT_KDE_EPS):
158
- """Create Logit-KDE estimator.
159
-
160
- Args:
161
- eps: Small value to add/subtract from bounds before logit transform
162
- """
163
- super().__init__(name="logit_kde")
164
- self.eps: float = eps
165
- self._kde: stats.gaussian_kde | None = None
166
-
167
- def fit(
168
- self,
169
- samples: npt.NDArray[np.float64],
170
- weights: npt.NDArray[np.float64] | None = None,
171
- random_state: int | None = None,
172
- ) -> None:
173
- """Fit KDE to logit-transformed samples."""
174
- validate_samples(samples, self.name)
175
- if weights is not None:
176
- validate_weights(weights, len(samples), self.name)
177
-
178
- # Transform samples to avoid boundaries
179
- samples_trans: npt.NDArray[np.float64] = np.clip(
180
- samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
181
- )
182
-
183
- # Apply logit transform: logit(p) = log(p / (1-p))
184
- logit_samples: npt.NDArray[np.float64] = np.log(samples_trans / (1 - samples_trans))
185
-
186
- # Fit Gaussian KDE in logit space
187
- self._kde = stats.gaussian_kde(logit_samples, weights=weights)
188
- self._mark_fitted()
189
-
190
- def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
191
- """Evaluate PDF using change of variables from logit space."""
192
- self._check_fitted()
193
- assert self._kde is not None # Guaranteed by _check_fitted()
194
-
195
- # Clip to valid range
196
- x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
197
-
198
- # Transform to logit space
199
- logit_x: npt.NDArray[np.float64] = np.log(x_clipped / (1 - x_clipped))
200
-
201
- # Evaluate KDE in logit space
202
- pdf_logit: npt.NDArray[np.float64] = cast(npt.NDArray[np.float64], self._kde(logit_x))
203
-
204
- # Apply Jacobian for change of variables: |d(logit(x))/dx| = 1/(x*(1-x))
205
- jacobian: npt.NDArray[np.float64] = 1.0 / (x_clipped * (1 - x_clipped))
206
-
207
- return pdf_logit * jacobian
208
-
209
-
210
- class BetaMixtureEstimator(DensityEstimatorBase):
211
- """Beta mixture model with EM algorithm (placeholder implementation)."""
212
-
213
- def __init__(self, n_components: int = 2, max_iter: int = 100, eps: float = DEFAULTS.BETA_EPS):
214
- """Create Beta mixture estimator.
215
-
216
- Args:
217
- n_components: Number of mixture components
218
- max_iter: Maximum EM iterations
219
- eps: Small value for numerical stability
220
- """
221
- super().__init__(name="beta_mix")
222
- self.n_components: int = n_components
223
- self.max_iter: int = max_iter
224
- self.eps: float = eps
225
- self._weights: npt.NDArray[np.float64] | None = None
226
- self._alphas: npt.NDArray[np.float64] | None = None
227
- self._betas: npt.NDArray[np.float64] | None = None
228
-
229
- def fit(
230
- self,
231
- samples: npt.NDArray[np.float64],
232
- weights: npt.NDArray[np.float64] | None = None,
233
- random_state: int | None = None,
234
- ) -> None:
235
- """Fit Beta mixture using simple initialization (full EM not implemented)."""
236
- validate_samples(samples, self.name)
237
- if weights is not None:
238
- validate_weights(weights, len(samples), self.name)
239
-
240
- # Placeholder: fit single Beta as fallback
241
- # Full EM implementation would go here
242
- # random_state parameter reserved for future EM implementation
243
-
244
- # Transform samples away from boundaries
245
- samples_trans: npt.NDArray[np.float64] = np.clip(
246
- samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
247
- )
248
-
249
- # Simple initialization: fit one component
250
- mean: float
251
- var: float
252
- if weights is None:
253
- mean = float(np.mean(samples_trans))
254
- var = float(np.var(samples_trans))
255
- else:
256
- w: npt.NDArray[np.float64] = weights / weights.sum()
257
- mean = float(np.sum(w * samples_trans))
258
- var = float(np.sum(w * (samples_trans - mean) ** 2))
259
-
260
- alpha: float
261
- beta: float
262
- if var <= 0 or var >= mean * (1 - mean):
263
- alpha, beta = 1.0, 1.0
264
- else:
265
- common: float = mean * (1 - mean) / var - 1
266
- alpha = max(0.1, mean * common)
267
- beta = max(0.1, (1 - mean) * common)
268
-
269
- # Store as single-component mixture
270
- self._weights = np.array([1.0])
271
- self._alphas = np.array([alpha])
272
- self._betas = np.array([beta])
273
-
274
- self._mark_fitted()
275
-
276
- def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
277
- """Evaluate mixture PDF as weighted sum of component PDFs."""
278
- self._check_fitted()
279
- assert self._weights is not None # Guaranteed by _check_fitted()
280
- assert self._alphas is not None # Guaranteed by _check_fitted()
281
- assert self._betas is not None # Guaranteed by _check_fitted()
282
-
283
- # Clip to valid range
284
- x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
285
-
286
- # Compute mixture: sum_k w_k * Beta(x; alpha_k, beta_k)
287
- pdf_vals: npt.NDArray[np.float64] = np.zeros_like(x_clipped)
288
- k: int
289
- for k in range(len(self._weights)):
290
- pdf_vals += self._weights[k] * stats.beta.pdf(x_clipped, self._alphas[k], self._betas[k])
291
-
292
- return pdf_vals
1
+ """Density estimator implementations for overlap_metrics library."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import cast
6
+
7
+ import numpy as np
8
+ import numpy.typing as npt
9
+ from scipy import stats
10
+
11
+ from .config import DEFAULTS, NUMERICS
12
+ from .core import DensityEstimatorBase
13
+ from .utils import validate_samples, validate_weights
14
+
15
+
16
+ class HistogramEstimator(DensityEstimatorBase):
17
+ """Histogram-based density estimator with optional smoothing."""
18
+
19
+ def __init__(self, n_bins: int = DEFAULTS.HIST_N_BINS, smooth: bool = DEFAULTS.HIST_SMOOTH):
20
+ """Create histogram estimator.
21
+
22
+ Args:
23
+ n_bins: Number of histogram bins
24
+ smooth: Apply Laplace smoothing if True
25
+ """
26
+ super().__init__(name="hist")
27
+ self.n_bins: int = n_bins
28
+ self.smooth: bool = smooth
29
+ self._bin_edges: npt.NDArray[np.float64] | None = None
30
+ self._bin_densities: npt.NDArray[np.float64] | None = None
31
+
32
+ def fit(
33
+ self,
34
+ samples: npt.NDArray[np.float64],
35
+ weights: npt.NDArray[np.float64] | None = None,
36
+ random_state: int | None = None,
37
+ ) -> None:
38
+ """Fit histogram to samples."""
39
+ validate_samples(samples, self.name)
40
+ if weights is not None:
41
+ validate_weights(weights, len(samples), self.name)
42
+
43
+ # Create histogram
44
+ counts: npt.NDArray[np.float64]
45
+ bin_edges: npt.NDArray[np.float64]
46
+ counts, bin_edges = np.histogram(
47
+ samples,
48
+ bins=self.n_bins,
49
+ range=(NUMERICS.SCORE_MIN, NUMERICS.SCORE_MAX),
50
+ weights=weights,
51
+ )
52
+
53
+ # Apply Laplace smoothing if requested
54
+ if self.smooth:
55
+ counts = counts + 1.0
56
+
57
+ # Normalize to get density (integral = 1)
58
+ bin_widths: npt.NDArray[np.float64] = np.diff(bin_edges)
59
+ densities: npt.NDArray[np.float64] = counts / (counts.sum() * bin_widths)
60
+
61
+ self._bin_edges = bin_edges
62
+ self._bin_densities = densities
63
+ self._mark_fitted()
64
+
65
+ def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
66
+ """Evaluate PDF at points x using piecewise constant histogram."""
67
+ self._check_fitted()
68
+ assert self._bin_edges is not None # Guaranteed by _check_fitted()
69
+ assert self._bin_densities is not None # Guaranteed by _check_fitted()
70
+
71
+ # Find which bin each x falls into
72
+ bin_indices: npt.NDArray[np.int_] = np.searchsorted(self._bin_edges[:-1], x, side="right") - 1
73
+
74
+ # Clip to valid range
75
+ bin_indices = np.clip(bin_indices, 0, len(self._bin_densities) - 1)
76
+
77
+ # Return density for each bin
78
+ return self._bin_densities[bin_indices]
79
+
80
+
81
+ class BetaEstimator(DensityEstimatorBase):
82
+ """Beta distribution estimator using method of moments."""
83
+
84
+ def __init__(self, eps: float = DEFAULTS.BETA_EPS):
85
+ """Create Beta estimator.
86
+
87
+ Args:
88
+ eps: Small value to add/subtract from bounds to avoid numerical issues
89
+ """
90
+ super().__init__(name="beta")
91
+ self.eps: float = eps
92
+ self._alpha: float | None = None
93
+ self._beta: float | None = None
94
+
95
+ def fit(
96
+ self,
97
+ samples: npt.NDArray[np.float64],
98
+ weights: npt.NDArray[np.float64] | None = None,
99
+ random_state: int | None = None,
100
+ ) -> None:
101
+ """Fit Beta distribution using method of moments."""
102
+ validate_samples(samples, self.name)
103
+ w: npt.NDArray[np.float64] | None
104
+ if weights is not None:
105
+ validate_weights(weights, len(samples), self.name)
106
+ # Normalize weights
107
+ w = weights / weights.sum()
108
+ else:
109
+ w = None
110
+
111
+ # Transform samples away from boundaries
112
+ samples_trans: npt.NDArray[np.float64] = np.clip(
113
+ samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
114
+ )
115
+
116
+ # Compute weighted moments
117
+ mean: float
118
+ var: float
119
+ if w is None:
120
+ mean = float(np.mean(samples_trans))
121
+ var = float(np.var(samples_trans))
122
+ else:
123
+ mean = float(np.sum(w * samples_trans))
124
+ var = float(np.sum(w * (samples_trans - mean) ** 2))
125
+
126
+ # Method of moments: solve for alpha, beta
127
+ if var <= 0 or var >= mean * (1 - mean):
128
+ # Fallback to uniform-ish distribution
129
+ self._alpha = 1.0
130
+ self._beta = 1.0
131
+ else:
132
+ common: float = mean * (1 - mean) / var - 1
133
+ self._alpha = mean * common
134
+ self._beta = (1 - mean) * common
135
+
136
+ # Ensure positive parameters
137
+ self._alpha = max(0.1, self._alpha)
138
+ self._beta = max(0.1, self._beta)
139
+
140
+ self._mark_fitted()
141
+
142
+ def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
143
+ """Evaluate Beta PDF at points x."""
144
+ self._check_fitted()
145
+ assert self._alpha is not None and self._beta is not None # Guaranteed by _check_fitted()
146
+
147
+ # Clip to valid range
148
+ x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
149
+
150
+ # Use scipy's beta distribution
151
+ return stats.beta.pdf(x_clipped, self._alpha, self._beta)
152
+
153
+
154
+ class LogitKDEEstimator(DensityEstimatorBase):
155
+ """KDE on logit-transformed [0,1] samples."""
156
+
157
+ def __init__(self, eps: float = DEFAULTS.LOGIT_KDE_EPS):
158
+ """Create Logit-KDE estimator.
159
+
160
+ Args:
161
+ eps: Small value to add/subtract from bounds before logit transform
162
+ """
163
+ super().__init__(name="logit_kde")
164
+ self.eps: float = eps
165
+ self._kde: stats.gaussian_kde | None = None
166
+
167
+ def fit(
168
+ self,
169
+ samples: npt.NDArray[np.float64],
170
+ weights: npt.NDArray[np.float64] | None = None,
171
+ random_state: int | None = None,
172
+ ) -> None:
173
+ """Fit KDE to logit-transformed samples."""
174
+ validate_samples(samples, self.name)
175
+ if weights is not None:
176
+ validate_weights(weights, len(samples), self.name)
177
+
178
+ # Transform samples to avoid boundaries
179
+ samples_trans: npt.NDArray[np.float64] = np.clip(
180
+ samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
181
+ )
182
+
183
+ # Apply logit transform: logit(p) = log(p / (1-p))
184
+ logit_samples: npt.NDArray[np.float64] = np.log(samples_trans / (1 - samples_trans))
185
+
186
+ # Fit Gaussian KDE in logit space
187
+ self._kde = stats.gaussian_kde(logit_samples, weights=weights)
188
+ self._mark_fitted()
189
+
190
+ def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
191
+ """Evaluate PDF using change of variables from logit space."""
192
+ self._check_fitted()
193
+ assert self._kde is not None # Guaranteed by _check_fitted()
194
+
195
+ # Clip to valid range
196
+ x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
197
+
198
+ # Transform to logit space
199
+ logit_x: npt.NDArray[np.float64] = np.log(x_clipped / (1 - x_clipped))
200
+
201
+ # Evaluate KDE in logit space
202
+ pdf_logit: npt.NDArray[np.float64] = cast(npt.NDArray[np.float64], self._kde(logit_x))
203
+
204
+ # Apply Jacobian for change of variables: |d(logit(x))/dx| = 1/(x*(1-x))
205
+ jacobian: npt.NDArray[np.float64] = 1.0 / (x_clipped * (1 - x_clipped))
206
+
207
+ return pdf_logit * jacobian
208
+
209
+
210
+ class BetaMixtureEstimator(DensityEstimatorBase):
211
+ """Beta mixture model with EM algorithm (placeholder implementation)."""
212
+
213
+ def __init__(self, n_components: int = 2, max_iter: int = 100, eps: float = DEFAULTS.BETA_EPS):
214
+ """Create Beta mixture estimator.
215
+
216
+ Args:
217
+ n_components: Number of mixture components
218
+ max_iter: Maximum EM iterations
219
+ eps: Small value for numerical stability
220
+ """
221
+ super().__init__(name="beta_mix")
222
+ self.n_components: int = n_components
223
+ self.max_iter: int = max_iter
224
+ self.eps: float = eps
225
+ self._weights: npt.NDArray[np.float64] | None = None
226
+ self._alphas: npt.NDArray[np.float64] | None = None
227
+ self._betas: npt.NDArray[np.float64] | None = None
228
+
229
+ def fit(
230
+ self,
231
+ samples: npt.NDArray[np.float64],
232
+ weights: npt.NDArray[np.float64] | None = None,
233
+ random_state: int | None = None,
234
+ ) -> None:
235
+ """Fit Beta mixture using simple initialization (full EM not implemented)."""
236
+ validate_samples(samples, self.name)
237
+ if weights is not None:
238
+ validate_weights(weights, len(samples), self.name)
239
+
240
+ # Placeholder: fit single Beta as fallback
241
+ # Full EM implementation would go here
242
+ # random_state parameter reserved for future EM implementation
243
+
244
+ # Transform samples away from boundaries
245
+ samples_trans: npt.NDArray[np.float64] = np.clip(
246
+ samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
247
+ )
248
+
249
+ # Simple initialization: fit one component
250
+ mean: float
251
+ var: float
252
+ if weights is None:
253
+ mean = float(np.mean(samples_trans))
254
+ var = float(np.var(samples_trans))
255
+ else:
256
+ w: npt.NDArray[np.float64] = weights / weights.sum()
257
+ mean = float(np.sum(w * samples_trans))
258
+ var = float(np.sum(w * (samples_trans - mean) ** 2))
259
+
260
+ alpha: float
261
+ beta: float
262
+ if var <= 0 or var >= mean * (1 - mean):
263
+ alpha, beta = 1.0, 1.0
264
+ else:
265
+ common: float = mean * (1 - mean) / var - 1
266
+ alpha = max(0.1, mean * common)
267
+ beta = max(0.1, (1 - mean) * common)
268
+
269
+ # Store as single-component mixture
270
+ self._weights = np.array([1.0])
271
+ self._alphas = np.array([alpha])
272
+ self._betas = np.array([beta])
273
+
274
+ self._mark_fitted()
275
+
276
+ def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
277
+ """Evaluate mixture PDF as weighted sum of component PDFs."""
278
+ self._check_fitted()
279
+ assert self._weights is not None # Guaranteed by _check_fitted()
280
+ assert self._alphas is not None # Guaranteed by _check_fitted()
281
+ assert self._betas is not None # Guaranteed by _check_fitted()
282
+
283
+ # Clip to valid range
284
+ x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
285
+
286
+ # Compute mixture: sum_k w_k * Beta(x; alpha_k, beta_k)
287
+ pdf_vals: npt.NDArray[np.float64] = np.zeros_like(x_clipped)
288
+ k: int
289
+ for k in range(len(self._weights)):
290
+ pdf_vals += self._weights[k] * stats.beta.pdf(x_clipped, self._alphas[k], self._betas[k])
291
+
292
+ return pdf_vals