derivkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- derivkit/__init__.py +22 -0
- derivkit/calculus/__init__.py +17 -0
- derivkit/calculus/calculus_core.py +152 -0
- derivkit/calculus/gradient.py +97 -0
- derivkit/calculus/hessian.py +528 -0
- derivkit/calculus/hyper_hessian.py +296 -0
- derivkit/calculus/jacobian.py +156 -0
- derivkit/calculus_kit.py +128 -0
- derivkit/derivative_kit.py +315 -0
- derivkit/derivatives/__init__.py +6 -0
- derivkit/derivatives/adaptive/__init__.py +5 -0
- derivkit/derivatives/adaptive/adaptive_fit.py +238 -0
- derivkit/derivatives/adaptive/batch_eval.py +179 -0
- derivkit/derivatives/adaptive/diagnostics.py +325 -0
- derivkit/derivatives/adaptive/grid.py +333 -0
- derivkit/derivatives/adaptive/polyfit_utils.py +513 -0
- derivkit/derivatives/adaptive/spacing.py +66 -0
- derivkit/derivatives/adaptive/transforms.py +245 -0
- derivkit/derivatives/autodiff/__init__.py +1 -0
- derivkit/derivatives/autodiff/jax_autodiff.py +95 -0
- derivkit/derivatives/autodiff/jax_core.py +217 -0
- derivkit/derivatives/autodiff/jax_utils.py +146 -0
- derivkit/derivatives/finite/__init__.py +5 -0
- derivkit/derivatives/finite/batch_eval.py +91 -0
- derivkit/derivatives/finite/core.py +84 -0
- derivkit/derivatives/finite/extrapolators.py +511 -0
- derivkit/derivatives/finite/finite_difference.py +247 -0
- derivkit/derivatives/finite/stencil.py +206 -0
- derivkit/derivatives/fornberg.py +245 -0
- derivkit/derivatives/local_polynomial_derivative/__init__.py +1 -0
- derivkit/derivatives/local_polynomial_derivative/diagnostics.py +90 -0
- derivkit/derivatives/local_polynomial_derivative/fit.py +199 -0
- derivkit/derivatives/local_polynomial_derivative/local_poly_config.py +95 -0
- derivkit/derivatives/local_polynomial_derivative/local_polynomial_derivative.py +205 -0
- derivkit/derivatives/local_polynomial_derivative/sampling.py +72 -0
- derivkit/derivatives/tabulated_model/__init__.py +1 -0
- derivkit/derivatives/tabulated_model/one_d.py +247 -0
- derivkit/forecast_kit.py +783 -0
- derivkit/forecasting/__init__.py +1 -0
- derivkit/forecasting/dali.py +78 -0
- derivkit/forecasting/expansions.py +486 -0
- derivkit/forecasting/fisher.py +298 -0
- derivkit/forecasting/fisher_gaussian.py +171 -0
- derivkit/forecasting/fisher_xy.py +357 -0
- derivkit/forecasting/forecast_core.py +313 -0
- derivkit/forecasting/getdist_dali_samples.py +429 -0
- derivkit/forecasting/getdist_fisher_samples.py +235 -0
- derivkit/forecasting/laplace.py +259 -0
- derivkit/forecasting/priors_core.py +860 -0
- derivkit/forecasting/sampling_utils.py +388 -0
- derivkit/likelihood_kit.py +114 -0
- derivkit/likelihoods/__init__.py +1 -0
- derivkit/likelihoods/gaussian.py +136 -0
- derivkit/likelihoods/poisson.py +176 -0
- derivkit/utils/__init__.py +13 -0
- derivkit/utils/concurrency.py +213 -0
- derivkit/utils/extrapolation.py +254 -0
- derivkit/utils/linalg.py +513 -0
- derivkit/utils/logger.py +26 -0
- derivkit/utils/numerics.py +262 -0
- derivkit/utils/sandbox.py +74 -0
- derivkit/utils/types.py +15 -0
- derivkit/utils/validate.py +811 -0
- derivkit-1.0.0.dist-info/METADATA +50 -0
- derivkit-1.0.0.dist-info/RECORD +68 -0
- derivkit-1.0.0.dist-info/WHEEL +5 -0
- derivkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- derivkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""Gaussian sampling-kernel helpers derived from a Fisher matrix.
|
|
2
|
+
|
|
3
|
+
This module implements a Fisher-based Gaussian sampling distribution
|
|
4
|
+
``q`` centered at ``theta0`` with covariance::
|
|
5
|
+
|
|
6
|
+
(kernel_scale**2) * pinv(F)
|
|
7
|
+
|
|
8
|
+
where ``F`` is the Fisher information matrix evaluated at ``theta0``.
|
|
9
|
+
In this approximation, ``F`` is the Hessian of ``-log L`` at ``theta0`` and
|
|
10
|
+
acts as the local inverse covariance.
|
|
11
|
+
|
|
12
|
+
This kernel is used to generate candidate points and, for importance sampling,
|
|
13
|
+
to evaluate ``log(q)`` when sampling posteriors approximated with Fisher or DALI.
|
|
14
|
+
|
|
15
|
+
For importance sampling, ``log(q)`` is subtracted from the target log-posterior to form log-weights.
|
|
16
|
+
|
|
17
|
+
It provides:
|
|
18
|
+
|
|
19
|
+
- construction of Fisher-based sampling covariances,
|
|
20
|
+
- stabilized Cholesky factorization for near-singular kernels,
|
|
21
|
+
- sampling and log-density evaluation of the kernel,
|
|
22
|
+
- fast bounds rejection masks,
|
|
23
|
+
- MCMC walker initialization under sampler bounds.
|
|
24
|
+
|
|
25
|
+
These utilities do not depend on a specific sampler implementation.
|
|
26
|
+
They provide kernel draws, log-densities, and bounds filtering that are
|
|
27
|
+
called by the GetDist importance-sampling wrappers and by the emcee walker
|
|
28
|
+
initialization routine.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
from typing import Sequence
|
|
34
|
+
|
|
35
|
+
import numpy as np
|
|
36
|
+
from numpy.typing import NDArray
|
|
37
|
+
|
|
38
|
+
from derivkit.utils.linalg import solve_or_pinv
|
|
39
|
+
from derivkit.utils.validate import (
|
|
40
|
+
validate_fisher_shape,
|
|
41
|
+
validate_square_matrix,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
"kernel_cov_from_fisher",
|
|
46
|
+
"stabilized_cholesky",
|
|
47
|
+
"kernel_samples_from_fisher",
|
|
48
|
+
"apply_parameter_bounds",
|
|
49
|
+
"log_gaussian_kernel",
|
|
50
|
+
"init_walkers_from_fisher",
|
|
51
|
+
"fisher_to_cov",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def kernel_cov_from_fisher(
|
|
56
|
+
fisher: NDArray[np.floating],
|
|
57
|
+
*,
|
|
58
|
+
kernel_scale: float,
|
|
59
|
+
) -> NDArray[np.float64]:
|
|
60
|
+
"""Returns the covariance of the Fisher-based Gaussian sampling kernel.
|
|
61
|
+
|
|
62
|
+
This is a thin wrapper around
|
|
63
|
+
:func:`derivkit.forecasting.integrations.sampling_utils.fisher_to_cov` that converts
|
|
64
|
+
``fisher`` to a covariance via pseudoinverse and applies the scaling ``kernel_scale^2``.
|
|
65
|
+
|
|
66
|
+
The Fisher matrix is treated as the Hessian of ``-log L`` at ``theta0``.
|
|
67
|
+
In this local approximation it acts as an inverse covariance, and
|
|
68
|
+
``kernel_scale`` controls the overall kernel width.
|
|
69
|
+
A pseudoinverse is used so the covariance is defined even if ``fisher`` is singular.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
fisher: Fisher information matrix with shape ``(p, p)``.
|
|
73
|
+
kernel_scale: Multiplicative scale factor applied to the covariance.
|
|
74
|
+
Increasing values widen the kernel; decreasing values narrow it.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Kernel covariance matrix with shape ``(p, p)``, equal to
|
|
78
|
+
``(kernel_scale^2) * fisher_to_cov(fisher)``.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ValueError: If ``fisher`` is not a square 2D array.
|
|
82
|
+
"""
|
|
83
|
+
kernel_cov = fisher_to_cov(fisher)
|
|
84
|
+
return (float(kernel_scale) ** 2) * kernel_cov
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def stabilized_cholesky(cov: NDArray[np.floating]) -> NDArray[np.float64]:
|
|
88
|
+
"""Returns a Cholesky factor of a covariance matrix.
|
|
89
|
+
|
|
90
|
+
This function computes a lower-triangular matrix ``L`` such that ``cov`` is
|
|
91
|
+
approximately ``L @ L.T``, even when ``cov`` is nearly singular or only
|
|
92
|
+
positive semi-definite.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
cov: Covariance matrix with shape ``(p, p)`` with ``p`` parameters.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
A lower-triangular Cholesky factor ``L`` of the regularized covariance.
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
ValueError: If ``cov`` is not a square 2D array.
|
|
102
|
+
"""
|
|
103
|
+
cov_matrix = np.asarray(cov, dtype=float)
|
|
104
|
+
|
|
105
|
+
if cov_matrix.ndim != 2 or cov_matrix.shape[0] != cov_matrix.shape[1]:
|
|
106
|
+
raise ValueError(f"cov must be square 2D, got {cov_matrix.shape}")
|
|
107
|
+
|
|
108
|
+
n_params = cov_matrix.shape[0]
|
|
109
|
+
trace_cov = float(np.trace(cov_matrix))
|
|
110
|
+
regularization_scale = max(trace_cov, 1.0)
|
|
111
|
+
jitter = 1e-12 * regularization_scale / max(n_params, 1)
|
|
112
|
+
|
|
113
|
+
regularized_cov = cov_matrix + jitter * np.eye(n_params)
|
|
114
|
+
cholesky_factor = np.linalg.cholesky(regularized_cov)
|
|
115
|
+
return cholesky_factor
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def kernel_samples_from_fisher(
|
|
119
|
+
theta0: NDArray[np.floating],
|
|
120
|
+
fisher: NDArray[np.floating],
|
|
121
|
+
*,
|
|
122
|
+
n_samples: int,
|
|
123
|
+
kernel_scale: float,
|
|
124
|
+
seed: int | None,
|
|
125
|
+
) -> NDArray[np.float64]:
|
|
126
|
+
"""Draws samples from a Fisher-based Gaussian sampling kernel.
|
|
127
|
+
|
|
128
|
+
Samples are drawn from the Gaussian kernel density ``q(theta)`` with mean
|
|
129
|
+
``theta0`` and covariance ``(kernel_scale^2) * pinv(F)``::
|
|
130
|
+
|
|
131
|
+
q(theta) = Normal(mean=theta0, cov=(kernel_scale^2) * pinv(F))
|
|
132
|
+
|
|
133
|
+
``F`` is the Fisher information matrix evaluated at ``theta0``.
|
|
134
|
+
In this approximation, ``F`` acts as an inverse covariance and
|
|
135
|
+
Normal represents the multivariate normal distribution.
|
|
136
|
+
A pseudoinverse is used so the covariance is defined even if
|
|
137
|
+
``F`` is singular or ill-conditioned.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
theta0: Kernel mean with shape ``(p,)`` with ``p`` parameters.
|
|
141
|
+
fisher: Fisher information matrix with shape ``(p, p)``.
|
|
142
|
+
n_samples: Number of samples to draw.
|
|
143
|
+
kernel_scale: Multiplicative scale applied to the covariance.
|
|
144
|
+
Increasing values widen the kernel; decreasing values narrow it.
|
|
145
|
+
seed: Optional random seed for reproducible draws.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Array of samples with shape ``(n_samples, p)``.
|
|
149
|
+
"""
|
|
150
|
+
rng = np.random.default_rng(seed)
|
|
151
|
+
|
|
152
|
+
theta0_vec = np.asarray(theta0, dtype=float)
|
|
153
|
+
fisher_matrix = np.asarray(fisher, dtype=float)
|
|
154
|
+
validate_fisher_shape(theta0_vec, fisher_matrix)
|
|
155
|
+
|
|
156
|
+
kernel_cov = kernel_cov_from_fisher(fisher_matrix, kernel_scale=float(kernel_scale))
|
|
157
|
+
chol_lower = stabilized_cholesky(kernel_cov)
|
|
158
|
+
|
|
159
|
+
n_params = theta0_vec.size
|
|
160
|
+
standard_normals = rng.standard_normal((int(n_samples), n_params))
|
|
161
|
+
|
|
162
|
+
kernel_mean = theta0_vec[None, :]
|
|
163
|
+
noise = (chol_lower @ standard_normals.T).T
|
|
164
|
+
return kernel_mean + noise
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def apply_parameter_bounds(
|
|
168
|
+
samples: NDArray[np.floating],
|
|
169
|
+
parameter_bounds: Sequence[tuple[float | None, float | None]] | None,
|
|
170
|
+
) -> NDArray[np.float64]:
|
|
171
|
+
"""Applies per-parameter bounds to a set of samples.
|
|
172
|
+
|
|
173
|
+
This function performs a fast, axis-aligned rejection step: any sample with
|
|
174
|
+
at least one parameter value outside the specified bounds is discarded.
|
|
175
|
+
It does not evaluate priors or compute weights.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
samples: Sample array with shape ``(n_samples, p)`` with ``p`` parameters.
|
|
179
|
+
parameter_bounds: Optional sequence of ``(lower, upper)`` bounds, one
|
|
180
|
+
per parameter. Use ``None`` to indicate an unbounded side
|
|
181
|
+
(e.g. ``(0.0, None)``). If ``None``, no filtering is applied.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Samples satisfying all bounds, with shape ``(n_kept, p)``.
|
|
185
|
+
``n_kept`` may be zero.
|
|
186
|
+
|
|
187
|
+
Raises:
|
|
188
|
+
ValueError: If ``parameter_bounds`` is provided but does not have
|
|
189
|
+
length ``p``.
|
|
190
|
+
"""
|
|
191
|
+
sample_array = np.asarray(samples, dtype=np.float64)
|
|
192
|
+
|
|
193
|
+
if parameter_bounds is None:
|
|
194
|
+
return sample_array
|
|
195
|
+
|
|
196
|
+
n_params = sample_array.shape[1]
|
|
197
|
+
if len(parameter_bounds) != n_params:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"parameter_bounds must have length {n_params}; got {len(parameter_bounds)}"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
keep_mask = np.ones(sample_array.shape[0], dtype=bool)
|
|
203
|
+
for param_index, (lower, upper) in enumerate(parameter_bounds):
|
|
204
|
+
if lower is not None:
|
|
205
|
+
keep_mask &= sample_array[:, param_index] >= lower
|
|
206
|
+
if upper is not None:
|
|
207
|
+
keep_mask &= sample_array[:, param_index] <= upper
|
|
208
|
+
|
|
209
|
+
return sample_array[keep_mask]
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def log_gaussian_kernel(
|
|
213
|
+
samples: NDArray[np.floating],
|
|
214
|
+
theta0: NDArray[np.floating],
|
|
215
|
+
fisher: NDArray[np.floating],
|
|
216
|
+
*,
|
|
217
|
+
kernel_scale: float,
|
|
218
|
+
) -> NDArray[np.float64]:
|
|
219
|
+
"""Log-density of a Fisher-based Gaussian sampling kernel.
|
|
220
|
+
|
|
221
|
+
Defines a Gaussian kernel ``q`` with mean ``theta0`` and covariance
|
|
222
|
+
|
|
223
|
+
``(kernel_scale^2) * pinv(F)``
|
|
224
|
+
|
|
225
|
+
where ``F`` is the Fisher information matrix. The covariance is formed with a
|
|
226
|
+
pseudoinverse (allowing singular or ill-conditioned ``F``). A small diagonal
|
|
227
|
+
jitter is added before evaluating the log-density so the calculation remains
|
|
228
|
+
well-defined for near-singular covariances.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
samples: Sample locations with shape ``(n_samples, p)``.
|
|
232
|
+
theta0: Kernel mean with shape ``(p,)``.
|
|
233
|
+
fisher: Fisher information matrix with shape ``(p, p)``.
|
|
234
|
+
kernel_scale: Multiplicative scale applied to the covariance.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Log of the kernel probability density ``log(q(theta))``, evaluated at each
|
|
238
|
+
sample point, with shape ``(n_samples,)``.
|
|
239
|
+
|
|
240
|
+
Raises:
|
|
241
|
+
ValueError: If input shapes are incompatible.
|
|
242
|
+
RuntimeError: If the jittered covariance is not positive-definite.
|
|
243
|
+
"""
|
|
244
|
+
theta0_vec = np.asarray(theta0, dtype=float)
|
|
245
|
+
fisher_matrix = np.asarray(fisher, dtype=float)
|
|
246
|
+
validate_fisher_shape(theta0_vec, fisher_matrix)
|
|
247
|
+
sample_array = np.asarray(samples, dtype=np.float64)
|
|
248
|
+
|
|
249
|
+
if sample_array.ndim != 2:
|
|
250
|
+
raise ValueError(
|
|
251
|
+
"samples must be 2D with shape (n_samples, p); "
|
|
252
|
+
f"got shape {sample_array.shape}."
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
if sample_array.shape[1] != theta0_vec.size:
|
|
256
|
+
raise ValueError(
|
|
257
|
+
f"samples must have p={theta0_vec.size} columns; got {sample_array.shape[1]}."
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
cov_matrix = kernel_cov_from_fisher(fisher_matrix, kernel_scale=float(kernel_scale))
|
|
261
|
+
|
|
262
|
+
n_params = theta0_vec.size
|
|
263
|
+
trace_cov = float(np.trace(cov_matrix))
|
|
264
|
+
trace_scale = max(trace_cov, 1.0)
|
|
265
|
+
jitter = 1e-12 * trace_scale / max(n_params, 1)
|
|
266
|
+
cov_matrix = cov_matrix + jitter * np.eye(n_params)
|
|
267
|
+
|
|
268
|
+
sign, logdet = np.linalg.slogdet(cov_matrix)
|
|
269
|
+
if sign <= 0 or not np.isfinite(logdet):
|
|
270
|
+
raise RuntimeError("Kernel covariance is not positive-definite after adding jitter.")
|
|
271
|
+
|
|
272
|
+
centered = sample_array - theta0_vec[None, :]
|
|
273
|
+
solved = solve_or_pinv(
|
|
274
|
+
cov_matrix,
|
|
275
|
+
centered.T,
|
|
276
|
+
rcond=1e-12,
|
|
277
|
+
assume_symmetric=True,
|
|
278
|
+
warn_context="log_gaussian_kernel",
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
quad_form = np.einsum("ij,ij->j", centered.T, solved)
|
|
282
|
+
norm_const = n_params * np.log(2.0 * np.pi) + logdet
|
|
283
|
+
log_gauss_density = -0.5 * (quad_form + norm_const)
|
|
284
|
+
return log_gauss_density
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def init_walkers_from_fisher(
|
|
288
|
+
theta0: NDArray[np.floating],
|
|
289
|
+
fisher: NDArray[np.floating],
|
|
290
|
+
*,
|
|
291
|
+
n_walkers: int,
|
|
292
|
+
init_scale: float,
|
|
293
|
+
seed: int | None,
|
|
294
|
+
sampler_bounds: Sequence[tuple[float | None, float | None]] | None,
|
|
295
|
+
) -> NDArray[np.float64]:
|
|
296
|
+
"""Returns initial MCMC walker positions from a Fisher-based Gaussian sampling kernel.
|
|
297
|
+
|
|
298
|
+
Returns an array of walker positions centered at ``theta0`` with scatter set by
|
|
299
|
+
a Fisher-derived covariance ``(init_scale^2) * pinv(F)``. If ``sampler_bounds``
|
|
300
|
+
are provided, positions outside the bounds are rejected and additional candidates
|
|
301
|
+
are generated until ``n_walkers`` positions are collected or a retry limit is reached.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
theta0: Kernel mean with shape ``(p,)`` with ``p`` parameters.
|
|
305
|
+
fisher: Fisher information matrix with shape ``(p, p)``.
|
|
306
|
+
n_walkers: Number of walker positions to return.
|
|
307
|
+
init_scale: Multiplicative scale applied to the kernel covariance.
|
|
308
|
+
seed: Optional random seed for reproducible initialization.
|
|
309
|
+
sampler_bounds: Optional per-parameter ``(lower, upper)`` bounds. Use ``None``
|
|
310
|
+
for an unbounded side.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Array of initial positions with shape ``(n_walkers, p)``.
|
|
314
|
+
|
|
315
|
+
Raises:
|
|
316
|
+
ValueError: If ``theta0`` and ``fisher`` have incompatible shapes.
|
|
317
|
+
ValueError: If ``sampler_bounds`` is provided and does not have length ``p``.
|
|
318
|
+
RuntimeError: If sufficient in-bounds positions cannot be generated within
|
|
319
|
+
the retry limit.
|
|
320
|
+
"""
|
|
321
|
+
theta0 = np.asarray(theta0, float)
|
|
322
|
+
fisher = np.asarray(fisher, float)
|
|
323
|
+
validate_fisher_shape(theta0, fisher)
|
|
324
|
+
|
|
325
|
+
if sampler_bounds is not None and len(sampler_bounds) != theta0.size:
|
|
326
|
+
raise ValueError(
|
|
327
|
+
f"sampler_bounds must have length {theta0.size}; got {len(sampler_bounds)}."
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
if sampler_bounds is None:
|
|
331
|
+
return kernel_samples_from_fisher(
|
|
332
|
+
theta0, fisher, n_samples=int(n_walkers), kernel_scale=float(init_scale), seed=seed
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
out: list[np.ndarray] = []
|
|
336
|
+
need = int(n_walkers)
|
|
337
|
+
tries = 0
|
|
338
|
+
while need > 0:
|
|
339
|
+
tries += 1
|
|
340
|
+
if tries > 50:
|
|
341
|
+
raise RuntimeError(
|
|
342
|
+
"Failed to initialize emcee walkers within sampler_bounds. "
|
|
343
|
+
"Try increasing init_scale or relaxing sampler_bounds."
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
draw = kernel_samples_from_fisher(
|
|
347
|
+
theta0,
|
|
348
|
+
fisher,
|
|
349
|
+
n_samples=max(need, int(n_walkers)),
|
|
350
|
+
kernel_scale=float(init_scale),
|
|
351
|
+
seed=None if seed is None else seed + tries
|
|
352
|
+
)
|
|
353
|
+
draw = apply_parameter_bounds(draw, sampler_bounds)
|
|
354
|
+
# apply_parameter_bounds returns an array with shape (n_kept, p).
|
|
355
|
+
# If n_kept == 0, all proposed walkers fell outside the sampler bounds,
|
|
356
|
+
# so we retry with a fresh draw.
|
|
357
|
+
if draw.shape[0] == 0:
|
|
358
|
+
continue
|
|
359
|
+
|
|
360
|
+
take = min(need, draw.shape[0])
|
|
361
|
+
out.append(draw[:take])
|
|
362
|
+
need -= take
|
|
363
|
+
|
|
364
|
+
return np.vstack(out)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def fisher_to_cov(
|
|
368
|
+
fisher: NDArray[np.floating],
|
|
369
|
+
*,
|
|
370
|
+
rcond: float | None = None
|
|
371
|
+
) -> NDArray[np.float64]:
|
|
372
|
+
"""Converts a Fisher matrix to a covariance matrix using pseudoinverse.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
fisher: Fisher information matrix with shape ``(p, p)``.
|
|
376
|
+
rcond: Cutoff ratio for small singular values in pseudoinverse.
|
|
377
|
+
If ``None``, the default from ``np.linalg.pinv`` is used.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Covariance matrix with shape ``(p, p)`` given by ``pinv(fisher)``.
|
|
381
|
+
|
|
382
|
+
Raises:
|
|
383
|
+
ValueError: If ``fisher`` is not a square 2D array.
|
|
384
|
+
"""
|
|
385
|
+
fisher = validate_square_matrix(fisher, name="fisher")
|
|
386
|
+
if rcond is None:
|
|
387
|
+
return np.linalg.pinv(fisher, hermitian=True)
|
|
388
|
+
return np.linalg.pinv(fisher, rcond=rcond, hermitian=True)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Provides the LikelihoodKit class.
|
|
2
|
+
|
|
3
|
+
Typical usage examples
|
|
4
|
+
----------------------
|
|
5
|
+
|
|
6
|
+
>>> import numpy as np
|
|
7
|
+
>>> from derivkit.likelihood_kit import LikelihoodKit
|
|
8
|
+
>>>
|
|
9
|
+
>>> # Gaussian example
|
|
10
|
+
>>> data = np.linspace(-5.0, 5.0, 200)
|
|
11
|
+
>>> theta = np.array([0.0])
|
|
12
|
+
>>> cov = np.array([[1.0]])
|
|
13
|
+
>>> lkit = LikelihoodKit(data=data, model_parameters=theta)
|
|
14
|
+
>>> grid, pdf = lkit.gaussian(cov=cov)
|
|
15
|
+
>>>
|
|
16
|
+
>>> # Poissonian example
|
|
17
|
+
>>> counts = np.array([1, 2, 3, 4])
|
|
18
|
+
>>> mu = np.array([0.5, 1.0, 1.5, 2.0])
|
|
19
|
+
>>> lkit = LikelihoodKit(data=counts, model_parameters=mu)
|
|
20
|
+
>>> reshaped_counts, pmf = lkit.poissonian()
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import numpy as np
|
|
26
|
+
from numpy.typing import ArrayLike, NDArray
|
|
27
|
+
|
|
28
|
+
from derivkit.likelihoods.gaussian import build_gaussian_likelihood
|
|
29
|
+
from derivkit.likelihoods.poisson import build_poissonian_likelihood
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class LikelihoodKit:
|
|
33
|
+
"""High-level interface for Gaussian and Poissonian likelihoods.
|
|
34
|
+
|
|
35
|
+
The class stores ``data`` and ``model_parameters`` and provides
|
|
36
|
+
methods to evaluate the corresponding likelihoods.
|
|
37
|
+
"""
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
data: ArrayLike,
|
|
41
|
+
model_parameters: ArrayLike,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Initialises the likelihoods object.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
data: Observed data values. The expected shape depends on the
|
|
47
|
+
particular likelihoods. For the Gaussian likelihoods, ``data``
|
|
48
|
+
is 1D or 2D, where axis 0 represents different samples and
|
|
49
|
+
axis 1 the values. For the Poissonian likelihoods, ``data`` is
|
|
50
|
+
reshaped to align with ``model_parameters``.
|
|
51
|
+
model_parameters: Theoretical model values. For the Gaussian
|
|
52
|
+
likelihoods, this is a 1D array of parameters used as the mean
|
|
53
|
+
of the multivariate normal. For the Poissonian likelihoods,
|
|
54
|
+
this is the expected counts (Poisson means).
|
|
55
|
+
"""
|
|
56
|
+
self.data = np.asarray(data)
|
|
57
|
+
self.model_parameters = np.asarray(model_parameters)
|
|
58
|
+
|
|
59
|
+
def gaussian(
|
|
60
|
+
self,
|
|
61
|
+
cov: ArrayLike,
|
|
62
|
+
*,
|
|
63
|
+
return_log: bool = True,
|
|
64
|
+
) -> tuple[tuple[NDArray[np.float64], ...], NDArray[np.float64]]:
|
|
65
|
+
"""Evaluates a Gaussian likelihoods for the stored data and parameters.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
cov: Covariance matrix. May be a scalar, a 1D vector of diagonal
|
|
69
|
+
variances, or a full 2D covariance matrix. It will be
|
|
70
|
+
symmetrized and normalized internally.
|
|
71
|
+
return_log: If ``True``, return the log-likelihoods instead of
|
|
72
|
+
the probability density function.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
A tuple ``(coordinate_grids, probabilities)`` where:
|
|
76
|
+
|
|
77
|
+
* ``coordinate_grids`` is a tuple of 1D arrays giving the
|
|
78
|
+
evaluation coordinates for each dimension.
|
|
79
|
+
* ``probabilities`` is an array with the values of the
|
|
80
|
+
multivariate Gaussian probability density (or log-density)
|
|
81
|
+
evaluated on the Cartesian product of those coordinates.
|
|
82
|
+
"""
|
|
83
|
+
return build_gaussian_likelihood(
|
|
84
|
+
data=self.data,
|
|
85
|
+
model_parameters=self.model_parameters,
|
|
86
|
+
cov=cov,
|
|
87
|
+
return_log=return_log,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def poissonian(
|
|
91
|
+
self,
|
|
92
|
+
*,
|
|
93
|
+
return_log: bool = True,
|
|
94
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
95
|
+
"""Evaluates a Poissonian likelihoods for the stored data and parameters.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
return_log: If ``True``, return the log-likelihoods instead of
|
|
99
|
+
the probability mass function.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
A tuple ``(counts, probabilities)`` where:
|
|
103
|
+
|
|
104
|
+
* ``counts`` is the data reshaped to align with the
|
|
105
|
+
model parameters.
|
|
106
|
+
* ``probabilities`` is an array of Poisson probabilities
|
|
107
|
+
(or log-probabilities) computed from ``counts`` and
|
|
108
|
+
``model_parameters``.
|
|
109
|
+
"""
|
|
110
|
+
return build_poissonian_likelihood(
|
|
111
|
+
data=self.data,
|
|
112
|
+
model_parameters=self.model_parameters,
|
|
113
|
+
return_log=return_log,
|
|
114
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Likelihood utilities."""
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Gaussian likelihoods function module."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from numpy.typing import ArrayLike, NDArray
|
|
7
|
+
from scipy.stats import multivariate_normal
|
|
8
|
+
|
|
9
|
+
from derivkit.utils.linalg import normalize_covariance
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"build_gaussian_likelihood",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def build_gaussian_likelihood(
|
|
17
|
+
data: ArrayLike,
|
|
18
|
+
model_parameters: ArrayLike,
|
|
19
|
+
cov: ArrayLike,
|
|
20
|
+
return_log: bool = True,
|
|
21
|
+
) -> tuple[tuple[NDArray[np.float64], ...], NDArray[np.float64]]:
|
|
22
|
+
"""Constructs the Gaussian likelihoods function.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
data: a 1D or 2D array representing the given data values. It is
|
|
26
|
+
expected that axis 0 represents different samples of data while
|
|
27
|
+
axis 1 represents the data values.
|
|
28
|
+
model_parameters: a 1D array representing the theoretical values
|
|
29
|
+
of the model parameters.
|
|
30
|
+
cov: covariance matrix. May be a scalar, a 1D vector of diagonal variances,
|
|
31
|
+
or a full 2D covariance matrix. It will be symmetrised and normalized
|
|
32
|
+
internally to ensure compatibility with the data and model_parameters.
|
|
33
|
+
return_log: when set to ``True``, return the log-likelihoods instead of
|
|
34
|
+
the probability density function.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
A tuple:
|
|
38
|
+
- coordinate_grids: tuple of 1D arrays giving the evaluation coordinates
|
|
39
|
+
for each dimension (one array per dimension), ordered consistently with
|
|
40
|
+
the first axis of ``data``.
|
|
41
|
+
- probability_density: ndarray with the values of the multivariate
|
|
42
|
+
Gaussian probability density function evaluated on the Cartesian
|
|
43
|
+
product of those coordinates.
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
ValueError: raised if
|
|
47
|
+
- data is not 1D or 2D,
|
|
48
|
+
- model_parameters is not 1D,
|
|
49
|
+
- the number of samples in data does not match the number of
|
|
50
|
+
model parameters,
|
|
51
|
+
- model_parameters contain non-finite values,
|
|
52
|
+
- cov cannot be normalized to a valid covariance matrix.
|
|
53
|
+
|
|
54
|
+
Examples:
|
|
55
|
+
A 1D Gaussian likelihoods:
|
|
56
|
+
>>> import numpy as np
|
|
57
|
+
>>> import matplotlib.pyplot as plt
|
|
58
|
+
>>> from derivkit.likelihoods.gaussian import build_gaussian_likelihood
|
|
59
|
+
>>> data = np.linspace(-10, 10, 100)[np.newaxis, :]
|
|
60
|
+
>>> model_parameters = np.array([1.0])
|
|
61
|
+
>>> cov = np.array([[2.0]])
|
|
62
|
+
>>> x_grid, pdf = build_gaussian_likelihood(data, model_parameters, cov)
|
|
63
|
+
>>> plt.plot(x_grid[0], pdf[0]) # doctest: +SKIP
|
|
64
|
+
A 2D Gaussian likelihoods:
|
|
65
|
+
>>> import numpy as np
|
|
66
|
+
>>> import matplotlib.pyplot as plt
|
|
67
|
+
>>> data = np.asarray((np.linspace(-10, 10, 30), np.linspace(3, 6, 30)))
|
|
68
|
+
>>> model_parameters = np.array([0.0, 4.0])
|
|
69
|
+
>>> cov = np.array([[1.0, 0.2], [0.2, 0.3]])
|
|
70
|
+
>>> # Build coordinate arrays and evaluate the probability density on their
|
|
71
|
+
>>> # Cartesian product. The indexing ensures the coordinate order matches
|
|
72
|
+
>>> # the order in ``data``.
|
|
73
|
+
>>> grid, probability_density = build_gaussian_likelihood(data, model_parameters, cov)
|
|
74
|
+
>>> plt.contour(*grid, probability_density) # doctest: +SKIP
|
|
75
|
+
"""
|
|
76
|
+
# The data is expected to be 2D. However, 1D is allowed, since it can be
|
|
77
|
+
# embedded in a 2D space.
|
|
78
|
+
_data = np.array(data, dtype=float, copy=True)
|
|
79
|
+
if not np.isfinite(_data).all():
|
|
80
|
+
raise ValueError("data contain non-finite values.")
|
|
81
|
+
if _data.ndim == 1:
|
|
82
|
+
_data = _data[np.newaxis, :]
|
|
83
|
+
elif _data.ndim > 2:
|
|
84
|
+
raise ValueError(f"data must be a 1D or 2D array, but is a {_data.ndim}D array.")
|
|
85
|
+
|
|
86
|
+
number_samples = _data.shape[0]
|
|
87
|
+
model_parameters = np.asarray(model_parameters, dtype=float)
|
|
88
|
+
if model_parameters.ndim != 1:
|
|
89
|
+
raise ValueError(
|
|
90
|
+
"model_parameters must be a 1D array, "
|
|
91
|
+
f"but is a {model_parameters.ndim}D array."
|
|
92
|
+
)
|
|
93
|
+
model_parameters = model_parameters.ravel()
|
|
94
|
+
if not np.isfinite(model_parameters).all():
|
|
95
|
+
raise ValueError("model_parameters contain non-finite values.")
|
|
96
|
+
|
|
97
|
+
number_model_parameters = model_parameters.size
|
|
98
|
+
if number_samples != number_model_parameters:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
"There must be as many model parameters as there are samples of data. "
|
|
101
|
+
f"(n_params={number_model_parameters}, n_samples={number_samples})"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
cov = np.asarray(cov, dtype=float)
|
|
105
|
+
if not np.isfinite(cov).all():
|
|
106
|
+
raise ValueError("cov contains non-finite values.")
|
|
107
|
+
cov_dim = cov.ndim
|
|
108
|
+
cov_shape = cov.shape
|
|
109
|
+
is_scalar = cov_dim == 0
|
|
110
|
+
is_valid_vector = cov_dim == 1 and cov_shape[0] == number_model_parameters
|
|
111
|
+
is_valid_matrix = (
|
|
112
|
+
cov_dim == 2
|
|
113
|
+
and cov_shape[0] == cov_shape[1] == number_model_parameters
|
|
114
|
+
)
|
|
115
|
+
if not (is_scalar or is_valid_vector or is_valid_matrix):
|
|
116
|
+
raise ValueError(
|
|
117
|
+
"Input cov is not compatible with input model_parameters."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
sigma = normalize_covariance(
|
|
121
|
+
(cov+cov.T)/2,
|
|
122
|
+
n_parameters=number_model_parameters
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# The data are coordinate vectors, which have to be extended into a
|
|
126
|
+
# coordinate grid (meshgrid). The grids are then combined to give a
|
|
127
|
+
# box of coordinates (dstack), which is then sent to the PDF. The
|
|
128
|
+
# indexing in meshgrid should ensure that the ordering of the grids
|
|
129
|
+
# corresponds to the ordering of the original data.
|
|
130
|
+
coordinate_grids = np.meshgrid(*_data, indexing="ij")
|
|
131
|
+
coordinate_box = np.dstack(coordinate_grids)
|
|
132
|
+
distribution = multivariate_normal(mean=model_parameters, cov=sigma)
|
|
133
|
+
probabilities = distribution.logpdf(coordinate_box) \
|
|
134
|
+
if return_log \
|
|
135
|
+
else distribution.pdf(coordinate_box)
|
|
136
|
+
return coordinate_grids, probabilities
|