photo-stack-finder 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orchestrator/__init__.py +2 -2
- orchestrator/app.py +6 -11
- orchestrator/build_pipeline.py +19 -21
- orchestrator/orchestrator_runner.py +11 -8
- orchestrator/pipeline_builder.py +126 -126
- orchestrator/pipeline_orchestrator.py +604 -604
- orchestrator/review_persistence.py +162 -162
- orchestrator/static/orchestrator.css +76 -76
- orchestrator/static/orchestrator.html +11 -5
- orchestrator/static/orchestrator.js +3 -1
- overlap_metrics/__init__.py +1 -1
- overlap_metrics/config.py +135 -135
- overlap_metrics/core.py +284 -284
- overlap_metrics/estimators.py +292 -292
- overlap_metrics/metrics.py +307 -307
- overlap_metrics/registry.py +99 -99
- overlap_metrics/utils.py +104 -104
- photo_compare/__init__.py +1 -1
- photo_compare/base.py +285 -285
- photo_compare/config.py +225 -225
- photo_compare/distance.py +15 -15
- photo_compare/feature_methods.py +173 -173
- photo_compare/file_hash.py +29 -29
- photo_compare/hash_methods.py +99 -99
- photo_compare/histogram_methods.py +118 -118
- photo_compare/pixel_methods.py +58 -58
- photo_compare/structural_methods.py +104 -104
- photo_compare/types.py +28 -28
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/METADATA +21 -22
- photo_stack_finder-0.1.8.dist-info/RECORD +75 -0
- scripts/orchestrate.py +12 -10
- utils/__init__.py +4 -3
- utils/base_pipeline_stage.py +171 -171
- utils/base_ports.py +176 -176
- utils/benchmark_utils.py +823 -823
- utils/channel.py +74 -74
- utils/comparison_gates.py +40 -21
- utils/compute_benchmarks.py +355 -355
- utils/compute_identical.py +94 -24
- utils/compute_indices.py +235 -235
- utils/compute_perceptual_hash.py +127 -127
- utils/compute_perceptual_match.py +240 -240
- utils/compute_sha_bins.py +64 -20
- utils/compute_template_similarity.py +1 -1
- utils/compute_versions.py +483 -483
- utils/config.py +8 -5
- utils/data_io.py +83 -83
- utils/graph_context.py +44 -44
- utils/logger.py +2 -2
- utils/models.py +2 -2
- utils/photo_file.py +90 -91
- utils/pipeline_graph.py +334 -334
- utils/pipeline_stage.py +408 -408
- utils/plot_helpers.py +123 -123
- utils/ports.py +136 -136
- utils/progress.py +415 -415
- utils/report_builder.py +139 -139
- utils/review_types.py +55 -55
- utils/review_utils.py +10 -19
- utils/sequence.py +10 -8
- utils/sequence_clustering.py +1 -1
- utils/template.py +57 -57
- utils/template_parsing.py +71 -0
- photo_stack_finder-0.1.7.dist-info/RECORD +0 -74
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/WHEEL +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/entry_points.txt +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/licenses/LICENSE +0 -0
- {photo_stack_finder-0.1.7.dist-info → photo_stack_finder-0.1.8.dist-info}/top_level.txt +0 -0
overlap_metrics/estimators.py
CHANGED
|
@@ -1,292 +1,292 @@
|
|
|
1
|
-
"""Density estimator implementations for overlap_metrics library."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from typing import cast
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import numpy.typing as npt
|
|
9
|
-
from scipy import stats
|
|
10
|
-
|
|
11
|
-
from .config import DEFAULTS, NUMERICS
|
|
12
|
-
from .core import DensityEstimatorBase
|
|
13
|
-
from .utils import validate_samples, validate_weights
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class HistogramEstimator(DensityEstimatorBase):
|
|
17
|
-
"""Histogram-based density estimator with optional smoothing."""
|
|
18
|
-
|
|
19
|
-
def __init__(self, n_bins: int = DEFAULTS.HIST_N_BINS, smooth: bool = DEFAULTS.HIST_SMOOTH):
|
|
20
|
-
"""Create histogram estimator.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
n_bins: Number of histogram bins
|
|
24
|
-
smooth: Apply Laplace smoothing if True
|
|
25
|
-
"""
|
|
26
|
-
super().__init__(name="hist")
|
|
27
|
-
self.n_bins: int = n_bins
|
|
28
|
-
self.smooth: bool = smooth
|
|
29
|
-
self._bin_edges: npt.NDArray[np.float64] | None = None
|
|
30
|
-
self._bin_densities: npt.NDArray[np.float64] | None = None
|
|
31
|
-
|
|
32
|
-
def fit(
|
|
33
|
-
self,
|
|
34
|
-
samples: npt.NDArray[np.float64],
|
|
35
|
-
weights: npt.NDArray[np.float64] | None = None,
|
|
36
|
-
random_state: int | None = None,
|
|
37
|
-
) -> None:
|
|
38
|
-
"""Fit histogram to samples."""
|
|
39
|
-
validate_samples(samples, self.name)
|
|
40
|
-
if weights is not None:
|
|
41
|
-
validate_weights(weights, len(samples), self.name)
|
|
42
|
-
|
|
43
|
-
# Create histogram
|
|
44
|
-
counts: npt.NDArray[np.float64]
|
|
45
|
-
bin_edges: npt.NDArray[np.float64]
|
|
46
|
-
counts, bin_edges = np.histogram(
|
|
47
|
-
samples,
|
|
48
|
-
bins=self.n_bins,
|
|
49
|
-
range=(NUMERICS.SCORE_MIN, NUMERICS.SCORE_MAX),
|
|
50
|
-
weights=weights,
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
# Apply Laplace smoothing if requested
|
|
54
|
-
if self.smooth:
|
|
55
|
-
counts = counts + 1.0
|
|
56
|
-
|
|
57
|
-
# Normalize to get density (integral = 1)
|
|
58
|
-
bin_widths: npt.NDArray[np.float64] = np.diff(bin_edges)
|
|
59
|
-
densities: npt.NDArray[np.float64] = counts / (counts.sum() * bin_widths)
|
|
60
|
-
|
|
61
|
-
self._bin_edges = bin_edges
|
|
62
|
-
self._bin_densities = densities
|
|
63
|
-
self._mark_fitted()
|
|
64
|
-
|
|
65
|
-
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
66
|
-
"""Evaluate PDF at points x using piecewise constant histogram."""
|
|
67
|
-
self._check_fitted()
|
|
68
|
-
assert self._bin_edges is not None # Guaranteed by _check_fitted()
|
|
69
|
-
assert self._bin_densities is not None # Guaranteed by _check_fitted()
|
|
70
|
-
|
|
71
|
-
# Find which bin each x falls into
|
|
72
|
-
bin_indices: npt.NDArray[np.int_] = np.searchsorted(self._bin_edges[:-1], x, side="right") - 1
|
|
73
|
-
|
|
74
|
-
# Clip to valid range
|
|
75
|
-
bin_indices = np.clip(bin_indices, 0, len(self._bin_densities) - 1)
|
|
76
|
-
|
|
77
|
-
# Return density for each bin
|
|
78
|
-
return self._bin_densities[bin_indices]
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class BetaEstimator(DensityEstimatorBase):
|
|
82
|
-
"""Beta distribution estimator using method of moments."""
|
|
83
|
-
|
|
84
|
-
def __init__(self, eps: float = DEFAULTS.BETA_EPS):
|
|
85
|
-
"""Create Beta estimator.
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
eps: Small value to add/subtract from bounds to avoid numerical issues
|
|
89
|
-
"""
|
|
90
|
-
super().__init__(name="beta")
|
|
91
|
-
self.eps: float = eps
|
|
92
|
-
self._alpha: float | None = None
|
|
93
|
-
self._beta: float | None = None
|
|
94
|
-
|
|
95
|
-
def fit(
|
|
96
|
-
self,
|
|
97
|
-
samples: npt.NDArray[np.float64],
|
|
98
|
-
weights: npt.NDArray[np.float64] | None = None,
|
|
99
|
-
random_state: int | None = None,
|
|
100
|
-
) -> None:
|
|
101
|
-
"""Fit Beta distribution using method of moments."""
|
|
102
|
-
validate_samples(samples, self.name)
|
|
103
|
-
w: npt.NDArray[np.float64] | None
|
|
104
|
-
if weights is not None:
|
|
105
|
-
validate_weights(weights, len(samples), self.name)
|
|
106
|
-
# Normalize weights
|
|
107
|
-
w = weights / weights.sum()
|
|
108
|
-
else:
|
|
109
|
-
w = None
|
|
110
|
-
|
|
111
|
-
# Transform samples away from boundaries
|
|
112
|
-
samples_trans: npt.NDArray[np.float64] = np.clip(
|
|
113
|
-
samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
# Compute weighted moments
|
|
117
|
-
mean: float
|
|
118
|
-
var: float
|
|
119
|
-
if w is None:
|
|
120
|
-
mean = float(np.mean(samples_trans))
|
|
121
|
-
var = float(np.var(samples_trans))
|
|
122
|
-
else:
|
|
123
|
-
mean = float(np.sum(w * samples_trans))
|
|
124
|
-
var = float(np.sum(w * (samples_trans - mean) ** 2))
|
|
125
|
-
|
|
126
|
-
# Method of moments: solve for alpha, beta
|
|
127
|
-
if var <= 0 or var >= mean * (1 - mean):
|
|
128
|
-
# Fallback to uniform-ish distribution
|
|
129
|
-
self._alpha = 1.0
|
|
130
|
-
self._beta = 1.0
|
|
131
|
-
else:
|
|
132
|
-
common: float = mean * (1 - mean) / var - 1
|
|
133
|
-
self._alpha = mean * common
|
|
134
|
-
self._beta = (1 - mean) * common
|
|
135
|
-
|
|
136
|
-
# Ensure positive parameters
|
|
137
|
-
self._alpha = max(0.1, self._alpha)
|
|
138
|
-
self._beta = max(0.1, self._beta)
|
|
139
|
-
|
|
140
|
-
self._mark_fitted()
|
|
141
|
-
|
|
142
|
-
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
143
|
-
"""Evaluate Beta PDF at points x."""
|
|
144
|
-
self._check_fitted()
|
|
145
|
-
assert self._alpha is not None and self._beta is not None # Guaranteed by _check_fitted()
|
|
146
|
-
|
|
147
|
-
# Clip to valid range
|
|
148
|
-
x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
|
|
149
|
-
|
|
150
|
-
# Use scipy's beta distribution
|
|
151
|
-
return stats.beta.pdf(x_clipped, self._alpha, self._beta)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
class LogitKDEEstimator(DensityEstimatorBase):
|
|
155
|
-
"""KDE on logit-transformed [0,1] samples."""
|
|
156
|
-
|
|
157
|
-
def __init__(self, eps: float = DEFAULTS.LOGIT_KDE_EPS):
|
|
158
|
-
"""Create Logit-KDE estimator.
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
eps: Small value to add/subtract from bounds before logit transform
|
|
162
|
-
"""
|
|
163
|
-
super().__init__(name="logit_kde")
|
|
164
|
-
self.eps: float = eps
|
|
165
|
-
self._kde: stats.gaussian_kde | None = None
|
|
166
|
-
|
|
167
|
-
def fit(
|
|
168
|
-
self,
|
|
169
|
-
samples: npt.NDArray[np.float64],
|
|
170
|
-
weights: npt.NDArray[np.float64] | None = None,
|
|
171
|
-
random_state: int | None = None,
|
|
172
|
-
) -> None:
|
|
173
|
-
"""Fit KDE to logit-transformed samples."""
|
|
174
|
-
validate_samples(samples, self.name)
|
|
175
|
-
if weights is not None:
|
|
176
|
-
validate_weights(weights, len(samples), self.name)
|
|
177
|
-
|
|
178
|
-
# Transform samples to avoid boundaries
|
|
179
|
-
samples_trans: npt.NDArray[np.float64] = np.clip(
|
|
180
|
-
samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
# Apply logit transform: logit(p) = log(p / (1-p))
|
|
184
|
-
logit_samples: npt.NDArray[np.float64] = np.log(samples_trans / (1 - samples_trans))
|
|
185
|
-
|
|
186
|
-
# Fit Gaussian KDE in logit space
|
|
187
|
-
self._kde = stats.gaussian_kde(logit_samples, weights=weights)
|
|
188
|
-
self._mark_fitted()
|
|
189
|
-
|
|
190
|
-
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
191
|
-
"""Evaluate PDF using change of variables from logit space."""
|
|
192
|
-
self._check_fitted()
|
|
193
|
-
assert self._kde is not None # Guaranteed by _check_fitted()
|
|
194
|
-
|
|
195
|
-
# Clip to valid range
|
|
196
|
-
x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
|
|
197
|
-
|
|
198
|
-
# Transform to logit space
|
|
199
|
-
logit_x: npt.NDArray[np.float64] = np.log(x_clipped / (1 - x_clipped))
|
|
200
|
-
|
|
201
|
-
# Evaluate KDE in logit space
|
|
202
|
-
pdf_logit: npt.NDArray[np.float64] = cast(npt.NDArray[np.float64], self._kde(logit_x))
|
|
203
|
-
|
|
204
|
-
# Apply Jacobian for change of variables: |d(logit(x))/dx| = 1/(x*(1-x))
|
|
205
|
-
jacobian: npt.NDArray[np.float64] = 1.0 / (x_clipped * (1 - x_clipped))
|
|
206
|
-
|
|
207
|
-
return pdf_logit * jacobian
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
class BetaMixtureEstimator(DensityEstimatorBase):
|
|
211
|
-
"""Beta mixture model with EM algorithm (placeholder implementation)."""
|
|
212
|
-
|
|
213
|
-
def __init__(self, n_components: int = 2, max_iter: int = 100, eps: float = DEFAULTS.BETA_EPS):
|
|
214
|
-
"""Create Beta mixture estimator.
|
|
215
|
-
|
|
216
|
-
Args:
|
|
217
|
-
n_components: Number of mixture components
|
|
218
|
-
max_iter: Maximum EM iterations
|
|
219
|
-
eps: Small value for numerical stability
|
|
220
|
-
"""
|
|
221
|
-
super().__init__(name="beta_mix")
|
|
222
|
-
self.n_components: int = n_components
|
|
223
|
-
self.max_iter: int = max_iter
|
|
224
|
-
self.eps: float = eps
|
|
225
|
-
self._weights: npt.NDArray[np.float64] | None = None
|
|
226
|
-
self._alphas: npt.NDArray[np.float64] | None = None
|
|
227
|
-
self._betas: npt.NDArray[np.float64] | None = None
|
|
228
|
-
|
|
229
|
-
def fit(
|
|
230
|
-
self,
|
|
231
|
-
samples: npt.NDArray[np.float64],
|
|
232
|
-
weights: npt.NDArray[np.float64] | None = None,
|
|
233
|
-
random_state: int | None = None,
|
|
234
|
-
) -> None:
|
|
235
|
-
"""Fit Beta mixture using simple initialization (full EM not implemented)."""
|
|
236
|
-
validate_samples(samples, self.name)
|
|
237
|
-
if weights is not None:
|
|
238
|
-
validate_weights(weights, len(samples), self.name)
|
|
239
|
-
|
|
240
|
-
# Placeholder: fit single Beta as fallback
|
|
241
|
-
# Full EM implementation would go here
|
|
242
|
-
# random_state parameter reserved for future EM implementation
|
|
243
|
-
|
|
244
|
-
# Transform samples away from boundaries
|
|
245
|
-
samples_trans: npt.NDArray[np.float64] = np.clip(
|
|
246
|
-
samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
# Simple initialization: fit one component
|
|
250
|
-
mean: float
|
|
251
|
-
var: float
|
|
252
|
-
if weights is None:
|
|
253
|
-
mean = float(np.mean(samples_trans))
|
|
254
|
-
var = float(np.var(samples_trans))
|
|
255
|
-
else:
|
|
256
|
-
w: npt.NDArray[np.float64] = weights / weights.sum()
|
|
257
|
-
mean = float(np.sum(w * samples_trans))
|
|
258
|
-
var = float(np.sum(w * (samples_trans - mean) ** 2))
|
|
259
|
-
|
|
260
|
-
alpha: float
|
|
261
|
-
beta: float
|
|
262
|
-
if var <= 0 or var >= mean * (1 - mean):
|
|
263
|
-
alpha, beta = 1.0, 1.0
|
|
264
|
-
else:
|
|
265
|
-
common: float = mean * (1 - mean) / var - 1
|
|
266
|
-
alpha = max(0.1, mean * common)
|
|
267
|
-
beta = max(0.1, (1 - mean) * common)
|
|
268
|
-
|
|
269
|
-
# Store as single-component mixture
|
|
270
|
-
self._weights = np.array([1.0])
|
|
271
|
-
self._alphas = np.array([alpha])
|
|
272
|
-
self._betas = np.array([beta])
|
|
273
|
-
|
|
274
|
-
self._mark_fitted()
|
|
275
|
-
|
|
276
|
-
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
277
|
-
"""Evaluate mixture PDF as weighted sum of component PDFs."""
|
|
278
|
-
self._check_fitted()
|
|
279
|
-
assert self._weights is not None # Guaranteed by _check_fitted()
|
|
280
|
-
assert self._alphas is not None # Guaranteed by _check_fitted()
|
|
281
|
-
assert self._betas is not None # Guaranteed by _check_fitted()
|
|
282
|
-
|
|
283
|
-
# Clip to valid range
|
|
284
|
-
x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
|
|
285
|
-
|
|
286
|
-
# Compute mixture: sum_k w_k * Beta(x; alpha_k, beta_k)
|
|
287
|
-
pdf_vals: npt.NDArray[np.float64] = np.zeros_like(x_clipped)
|
|
288
|
-
k: int
|
|
289
|
-
for k in range(len(self._weights)):
|
|
290
|
-
pdf_vals += self._weights[k] * stats.beta.pdf(x_clipped, self._alphas[k], self._betas[k])
|
|
291
|
-
|
|
292
|
-
return pdf_vals
|
|
1
|
+
"""Density estimator implementations for overlap_metrics library."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import cast
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import numpy.typing as npt
|
|
9
|
+
from scipy import stats
|
|
10
|
+
|
|
11
|
+
from .config import DEFAULTS, NUMERICS
|
|
12
|
+
from .core import DensityEstimatorBase
|
|
13
|
+
from .utils import validate_samples, validate_weights
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HistogramEstimator(DensityEstimatorBase):
|
|
17
|
+
"""Histogram-based density estimator with optional smoothing."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, n_bins: int = DEFAULTS.HIST_N_BINS, smooth: bool = DEFAULTS.HIST_SMOOTH):
|
|
20
|
+
"""Create histogram estimator.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
n_bins: Number of histogram bins
|
|
24
|
+
smooth: Apply Laplace smoothing if True
|
|
25
|
+
"""
|
|
26
|
+
super().__init__(name="hist")
|
|
27
|
+
self.n_bins: int = n_bins
|
|
28
|
+
self.smooth: bool = smooth
|
|
29
|
+
self._bin_edges: npt.NDArray[np.float64] | None = None
|
|
30
|
+
self._bin_densities: npt.NDArray[np.float64] | None = None
|
|
31
|
+
|
|
32
|
+
def fit(
|
|
33
|
+
self,
|
|
34
|
+
samples: npt.NDArray[np.float64],
|
|
35
|
+
weights: npt.NDArray[np.float64] | None = None,
|
|
36
|
+
random_state: int | None = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Fit histogram to samples."""
|
|
39
|
+
validate_samples(samples, self.name)
|
|
40
|
+
if weights is not None:
|
|
41
|
+
validate_weights(weights, len(samples), self.name)
|
|
42
|
+
|
|
43
|
+
# Create histogram
|
|
44
|
+
counts: npt.NDArray[np.float64]
|
|
45
|
+
bin_edges: npt.NDArray[np.float64]
|
|
46
|
+
counts, bin_edges = np.histogram(
|
|
47
|
+
samples,
|
|
48
|
+
bins=self.n_bins,
|
|
49
|
+
range=(NUMERICS.SCORE_MIN, NUMERICS.SCORE_MAX),
|
|
50
|
+
weights=weights,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Apply Laplace smoothing if requested
|
|
54
|
+
if self.smooth:
|
|
55
|
+
counts = counts + 1.0
|
|
56
|
+
|
|
57
|
+
# Normalize to get density (integral = 1)
|
|
58
|
+
bin_widths: npt.NDArray[np.float64] = np.diff(bin_edges)
|
|
59
|
+
densities: npt.NDArray[np.float64] = counts / (counts.sum() * bin_widths)
|
|
60
|
+
|
|
61
|
+
self._bin_edges = bin_edges
|
|
62
|
+
self._bin_densities = densities
|
|
63
|
+
self._mark_fitted()
|
|
64
|
+
|
|
65
|
+
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
66
|
+
"""Evaluate PDF at points x using piecewise constant histogram."""
|
|
67
|
+
self._check_fitted()
|
|
68
|
+
assert self._bin_edges is not None # Guaranteed by _check_fitted()
|
|
69
|
+
assert self._bin_densities is not None # Guaranteed by _check_fitted()
|
|
70
|
+
|
|
71
|
+
# Find which bin each x falls into
|
|
72
|
+
bin_indices: npt.NDArray[np.int_] = np.searchsorted(self._bin_edges[:-1], x, side="right") - 1
|
|
73
|
+
|
|
74
|
+
# Clip to valid range
|
|
75
|
+
bin_indices = np.clip(bin_indices, 0, len(self._bin_densities) - 1)
|
|
76
|
+
|
|
77
|
+
# Return density for each bin
|
|
78
|
+
return self._bin_densities[bin_indices]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class BetaEstimator(DensityEstimatorBase):
|
|
82
|
+
"""Beta distribution estimator using method of moments."""
|
|
83
|
+
|
|
84
|
+
def __init__(self, eps: float = DEFAULTS.BETA_EPS):
|
|
85
|
+
"""Create Beta estimator.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
eps: Small value to add/subtract from bounds to avoid numerical issues
|
|
89
|
+
"""
|
|
90
|
+
super().__init__(name="beta")
|
|
91
|
+
self.eps: float = eps
|
|
92
|
+
self._alpha: float | None = None
|
|
93
|
+
self._beta: float | None = None
|
|
94
|
+
|
|
95
|
+
def fit(
|
|
96
|
+
self,
|
|
97
|
+
samples: npt.NDArray[np.float64],
|
|
98
|
+
weights: npt.NDArray[np.float64] | None = None,
|
|
99
|
+
random_state: int | None = None,
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Fit Beta distribution using method of moments."""
|
|
102
|
+
validate_samples(samples, self.name)
|
|
103
|
+
w: npt.NDArray[np.float64] | None
|
|
104
|
+
if weights is not None:
|
|
105
|
+
validate_weights(weights, len(samples), self.name)
|
|
106
|
+
# Normalize weights
|
|
107
|
+
w = weights / weights.sum()
|
|
108
|
+
else:
|
|
109
|
+
w = None
|
|
110
|
+
|
|
111
|
+
# Transform samples away from boundaries
|
|
112
|
+
samples_trans: npt.NDArray[np.float64] = np.clip(
|
|
113
|
+
samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Compute weighted moments
|
|
117
|
+
mean: float
|
|
118
|
+
var: float
|
|
119
|
+
if w is None:
|
|
120
|
+
mean = float(np.mean(samples_trans))
|
|
121
|
+
var = float(np.var(samples_trans))
|
|
122
|
+
else:
|
|
123
|
+
mean = float(np.sum(w * samples_trans))
|
|
124
|
+
var = float(np.sum(w * (samples_trans - mean) ** 2))
|
|
125
|
+
|
|
126
|
+
# Method of moments: solve for alpha, beta
|
|
127
|
+
if var <= 0 or var >= mean * (1 - mean):
|
|
128
|
+
# Fallback to uniform-ish distribution
|
|
129
|
+
self._alpha = 1.0
|
|
130
|
+
self._beta = 1.0
|
|
131
|
+
else:
|
|
132
|
+
common: float = mean * (1 - mean) / var - 1
|
|
133
|
+
self._alpha = mean * common
|
|
134
|
+
self._beta = (1 - mean) * common
|
|
135
|
+
|
|
136
|
+
# Ensure positive parameters
|
|
137
|
+
self._alpha = max(0.1, self._alpha)
|
|
138
|
+
self._beta = max(0.1, self._beta)
|
|
139
|
+
|
|
140
|
+
self._mark_fitted()
|
|
141
|
+
|
|
142
|
+
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
143
|
+
"""Evaluate Beta PDF at points x."""
|
|
144
|
+
self._check_fitted()
|
|
145
|
+
assert self._alpha is not None and self._beta is not None # Guaranteed by _check_fitted()
|
|
146
|
+
|
|
147
|
+
# Clip to valid range
|
|
148
|
+
x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
|
|
149
|
+
|
|
150
|
+
# Use scipy's beta distribution
|
|
151
|
+
return stats.beta.pdf(x_clipped, self._alpha, self._beta)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class LogitKDEEstimator(DensityEstimatorBase):
|
|
155
|
+
"""KDE on logit-transformed [0,1] samples."""
|
|
156
|
+
|
|
157
|
+
def __init__(self, eps: float = DEFAULTS.LOGIT_KDE_EPS):
|
|
158
|
+
"""Create Logit-KDE estimator.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
eps: Small value to add/subtract from bounds before logit transform
|
|
162
|
+
"""
|
|
163
|
+
super().__init__(name="logit_kde")
|
|
164
|
+
self.eps: float = eps
|
|
165
|
+
self._kde: stats.gaussian_kde | None = None
|
|
166
|
+
|
|
167
|
+
def fit(
|
|
168
|
+
self,
|
|
169
|
+
samples: npt.NDArray[np.float64],
|
|
170
|
+
weights: npt.NDArray[np.float64] | None = None,
|
|
171
|
+
random_state: int | None = None,
|
|
172
|
+
) -> None:
|
|
173
|
+
"""Fit KDE to logit-transformed samples."""
|
|
174
|
+
validate_samples(samples, self.name)
|
|
175
|
+
if weights is not None:
|
|
176
|
+
validate_weights(weights, len(samples), self.name)
|
|
177
|
+
|
|
178
|
+
# Transform samples to avoid boundaries
|
|
179
|
+
samples_trans: npt.NDArray[np.float64] = np.clip(
|
|
180
|
+
samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Apply logit transform: logit(p) = log(p / (1-p))
|
|
184
|
+
logit_samples: npt.NDArray[np.float64] = np.log(samples_trans / (1 - samples_trans))
|
|
185
|
+
|
|
186
|
+
# Fit Gaussian KDE in logit space
|
|
187
|
+
self._kde = stats.gaussian_kde(logit_samples, weights=weights)
|
|
188
|
+
self._mark_fitted()
|
|
189
|
+
|
|
190
|
+
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
191
|
+
"""Evaluate PDF using change of variables from logit space."""
|
|
192
|
+
self._check_fitted()
|
|
193
|
+
assert self._kde is not None # Guaranteed by _check_fitted()
|
|
194
|
+
|
|
195
|
+
# Clip to valid range
|
|
196
|
+
x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
|
|
197
|
+
|
|
198
|
+
# Transform to logit space
|
|
199
|
+
logit_x: npt.NDArray[np.float64] = np.log(x_clipped / (1 - x_clipped))
|
|
200
|
+
|
|
201
|
+
# Evaluate KDE in logit space
|
|
202
|
+
pdf_logit: npt.NDArray[np.float64] = cast(npt.NDArray[np.float64], self._kde(logit_x))
|
|
203
|
+
|
|
204
|
+
# Apply Jacobian for change of variables: |d(logit(x))/dx| = 1/(x*(1-x))
|
|
205
|
+
jacobian: npt.NDArray[np.float64] = 1.0 / (x_clipped * (1 - x_clipped))
|
|
206
|
+
|
|
207
|
+
return pdf_logit * jacobian
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class BetaMixtureEstimator(DensityEstimatorBase):
|
|
211
|
+
"""Beta mixture model with EM algorithm (placeholder implementation)."""
|
|
212
|
+
|
|
213
|
+
def __init__(self, n_components: int = 2, max_iter: int = 100, eps: float = DEFAULTS.BETA_EPS):
|
|
214
|
+
"""Create Beta mixture estimator.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
n_components: Number of mixture components
|
|
218
|
+
max_iter: Maximum EM iterations
|
|
219
|
+
eps: Small value for numerical stability
|
|
220
|
+
"""
|
|
221
|
+
super().__init__(name="beta_mix")
|
|
222
|
+
self.n_components: int = n_components
|
|
223
|
+
self.max_iter: int = max_iter
|
|
224
|
+
self.eps: float = eps
|
|
225
|
+
self._weights: npt.NDArray[np.float64] | None = None
|
|
226
|
+
self._alphas: npt.NDArray[np.float64] | None = None
|
|
227
|
+
self._betas: npt.NDArray[np.float64] | None = None
|
|
228
|
+
|
|
229
|
+
def fit(
|
|
230
|
+
self,
|
|
231
|
+
samples: npt.NDArray[np.float64],
|
|
232
|
+
weights: npt.NDArray[np.float64] | None = None,
|
|
233
|
+
random_state: int | None = None,
|
|
234
|
+
) -> None:
|
|
235
|
+
"""Fit Beta mixture using simple initialization (full EM not implemented)."""
|
|
236
|
+
validate_samples(samples, self.name)
|
|
237
|
+
if weights is not None:
|
|
238
|
+
validate_weights(weights, len(samples), self.name)
|
|
239
|
+
|
|
240
|
+
# Placeholder: fit single Beta as fallback
|
|
241
|
+
# Full EM implementation would go here
|
|
242
|
+
# random_state parameter reserved for future EM implementation
|
|
243
|
+
|
|
244
|
+
# Transform samples away from boundaries
|
|
245
|
+
samples_trans: npt.NDArray[np.float64] = np.clip(
|
|
246
|
+
samples, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Simple initialization: fit one component
|
|
250
|
+
mean: float
|
|
251
|
+
var: float
|
|
252
|
+
if weights is None:
|
|
253
|
+
mean = float(np.mean(samples_trans))
|
|
254
|
+
var = float(np.var(samples_trans))
|
|
255
|
+
else:
|
|
256
|
+
w: npt.NDArray[np.float64] = weights / weights.sum()
|
|
257
|
+
mean = float(np.sum(w * samples_trans))
|
|
258
|
+
var = float(np.sum(w * (samples_trans - mean) ** 2))
|
|
259
|
+
|
|
260
|
+
alpha: float
|
|
261
|
+
beta: float
|
|
262
|
+
if var <= 0 or var >= mean * (1 - mean):
|
|
263
|
+
alpha, beta = 1.0, 1.0
|
|
264
|
+
else:
|
|
265
|
+
common: float = mean * (1 - mean) / var - 1
|
|
266
|
+
alpha = max(0.1, mean * common)
|
|
267
|
+
beta = max(0.1, (1 - mean) * common)
|
|
268
|
+
|
|
269
|
+
# Store as single-component mixture
|
|
270
|
+
self._weights = np.array([1.0])
|
|
271
|
+
self._alphas = np.array([alpha])
|
|
272
|
+
self._betas = np.array([beta])
|
|
273
|
+
|
|
274
|
+
self._mark_fitted()
|
|
275
|
+
|
|
276
|
+
def pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]:
|
|
277
|
+
"""Evaluate mixture PDF as weighted sum of component PDFs."""
|
|
278
|
+
self._check_fitted()
|
|
279
|
+
assert self._weights is not None # Guaranteed by _check_fitted()
|
|
280
|
+
assert self._alphas is not None # Guaranteed by _check_fitted()
|
|
281
|
+
assert self._betas is not None # Guaranteed by _check_fitted()
|
|
282
|
+
|
|
283
|
+
# Clip to valid range
|
|
284
|
+
x_clipped: npt.NDArray[np.float64] = np.clip(x, NUMERICS.SCORE_MIN + self.eps, NUMERICS.SCORE_MAX - self.eps)
|
|
285
|
+
|
|
286
|
+
# Compute mixture: sum_k w_k * Beta(x; alpha_k, beta_k)
|
|
287
|
+
pdf_vals: npt.NDArray[np.float64] = np.zeros_like(x_clipped)
|
|
288
|
+
k: int
|
|
289
|
+
for k in range(len(self._weights)):
|
|
290
|
+
pdf_vals += self._weights[k] * stats.beta.pdf(x_clipped, self._alphas[k], self._betas[k])
|
|
291
|
+
|
|
292
|
+
return pdf_vals
|