derivkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- derivkit/__init__.py +22 -0
- derivkit/calculus/__init__.py +17 -0
- derivkit/calculus/calculus_core.py +152 -0
- derivkit/calculus/gradient.py +97 -0
- derivkit/calculus/hessian.py +528 -0
- derivkit/calculus/hyper_hessian.py +296 -0
- derivkit/calculus/jacobian.py +156 -0
- derivkit/calculus_kit.py +128 -0
- derivkit/derivative_kit.py +315 -0
- derivkit/derivatives/__init__.py +6 -0
- derivkit/derivatives/adaptive/__init__.py +5 -0
- derivkit/derivatives/adaptive/adaptive_fit.py +238 -0
- derivkit/derivatives/adaptive/batch_eval.py +179 -0
- derivkit/derivatives/adaptive/diagnostics.py +325 -0
- derivkit/derivatives/adaptive/grid.py +333 -0
- derivkit/derivatives/adaptive/polyfit_utils.py +513 -0
- derivkit/derivatives/adaptive/spacing.py +66 -0
- derivkit/derivatives/adaptive/transforms.py +245 -0
- derivkit/derivatives/autodiff/__init__.py +1 -0
- derivkit/derivatives/autodiff/jax_autodiff.py +95 -0
- derivkit/derivatives/autodiff/jax_core.py +217 -0
- derivkit/derivatives/autodiff/jax_utils.py +146 -0
- derivkit/derivatives/finite/__init__.py +5 -0
- derivkit/derivatives/finite/batch_eval.py +91 -0
- derivkit/derivatives/finite/core.py +84 -0
- derivkit/derivatives/finite/extrapolators.py +511 -0
- derivkit/derivatives/finite/finite_difference.py +247 -0
- derivkit/derivatives/finite/stencil.py +206 -0
- derivkit/derivatives/fornberg.py +245 -0
- derivkit/derivatives/local_polynomial_derivative/__init__.py +1 -0
- derivkit/derivatives/local_polynomial_derivative/diagnostics.py +90 -0
- derivkit/derivatives/local_polynomial_derivative/fit.py +199 -0
- derivkit/derivatives/local_polynomial_derivative/local_poly_config.py +95 -0
- derivkit/derivatives/local_polynomial_derivative/local_polynomial_derivative.py +205 -0
- derivkit/derivatives/local_polynomial_derivative/sampling.py +72 -0
- derivkit/derivatives/tabulated_model/__init__.py +1 -0
- derivkit/derivatives/tabulated_model/one_d.py +247 -0
- derivkit/forecast_kit.py +783 -0
- derivkit/forecasting/__init__.py +1 -0
- derivkit/forecasting/dali.py +78 -0
- derivkit/forecasting/expansions.py +486 -0
- derivkit/forecasting/fisher.py +298 -0
- derivkit/forecasting/fisher_gaussian.py +171 -0
- derivkit/forecasting/fisher_xy.py +357 -0
- derivkit/forecasting/forecast_core.py +313 -0
- derivkit/forecasting/getdist_dali_samples.py +429 -0
- derivkit/forecasting/getdist_fisher_samples.py +235 -0
- derivkit/forecasting/laplace.py +259 -0
- derivkit/forecasting/priors_core.py +860 -0
- derivkit/forecasting/sampling_utils.py +388 -0
- derivkit/likelihood_kit.py +114 -0
- derivkit/likelihoods/__init__.py +1 -0
- derivkit/likelihoods/gaussian.py +136 -0
- derivkit/likelihoods/poisson.py +176 -0
- derivkit/utils/__init__.py +13 -0
- derivkit/utils/concurrency.py +213 -0
- derivkit/utils/extrapolation.py +254 -0
- derivkit/utils/linalg.py +513 -0
- derivkit/utils/logger.py +26 -0
- derivkit/utils/numerics.py +262 -0
- derivkit/utils/sandbox.py +74 -0
- derivkit/utils/types.py +15 -0
- derivkit/utils/validate.py +811 -0
- derivkit-1.0.0.dist-info/METADATA +50 -0
- derivkit-1.0.0.dist-info/RECORD +68 -0
- derivkit-1.0.0.dist-info/WHEEL +5 -0
- derivkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- derivkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
r"""Gaussian Fisher matrix utilities for models with uncertainty in both inputs and outputs.
|
|
2
|
+
|
|
3
|
+
This module implements the X–Y Gaussian Fisher formalism, where both the measured
|
|
4
|
+
inputs and outputs are noisy and may be correlated. The key idea is to account for
|
|
5
|
+
uncertainty in the inputs by propagating it into an effective covariance for the
|
|
6
|
+
outputs through a local linearization of the model. This allows standard Gaussian
|
|
7
|
+
Fisher matrix techniques to be applied without explicitly marginalizing over the
|
|
8
|
+
latent input variables.
|
|
9
|
+
|
|
10
|
+
Model and covariance structure
|
|
11
|
+
------------------------------
|
|
12
|
+
|
|
13
|
+
The model provides a mean prediction ``mu_xy(x, theta)`` for the observed output
|
|
14
|
+
``y`` as a function of inputs ``x`` and parameters ``theta``. Measurement errors
|
|
15
|
+
on ``x`` and ``y`` are described by a joint Gaussian covariance
|
|
16
|
+
|
|
17
|
+
.. math::
|
|
18
|
+
|
|
19
|
+
C =
|
|
20
|
+
\begin{pmatrix}
|
|
21
|
+
C_{xx} & C_{xy} \\
|
|
22
|
+
C_{xy}^{\mathsf{T}} & C_{yy}
|
|
23
|
+
\end{pmatrix}.
|
|
24
|
+
|
|
25
|
+
Linearizing the model mean in the inputs around the measured values ``x_obs``,
|
|
26
|
+
|
|
27
|
+
.. math::
|
|
28
|
+
|
|
29
|
+
\mu_{xy}(x, \theta) \approx \mu_{xy}(x_{\mathrm{obs}}, \theta) + T (x - x_{\mathrm{obs}}),
|
|
30
|
+
|
|
31
|
+
with
|
|
32
|
+
|
|
33
|
+
.. math::
|
|
34
|
+
|
|
35
|
+
T = \left.\frac{\partial \mu_{xy}}{\partial x}\right|_{(x_{\mathrm{obs}}, \theta)},
|
|
36
|
+
|
|
37
|
+
yields an effective output covariance
|
|
38
|
+
|
|
39
|
+
.. math::
|
|
40
|
+
|
|
41
|
+
R = C_{yy}
|
|
42
|
+
- C_{xy}^{\mathsf{T}} T^{\mathsf{T}}
|
|
43
|
+
- T C_{xy}
|
|
44
|
+
+ T C_{xx} T^{\mathsf{T}}.
|
|
45
|
+
|
|
46
|
+
This effective covariance replaces ``C_{yy}`` in the Gaussian likelihoods and Fisher
|
|
47
|
+
matrix. The covariance blocks ``Cxx``, ``Cxy``, and ``Cyy`` are treated as fixed;
|
|
48
|
+
parameter dependence enters only through the local sensitivity matrix ``T``.
|
|
49
|
+
|
|
50
|
+
This formalism follows the generalized Fisher matrix treatment of
|
|
51
|
+
Heavens et al. (2014), https://arxiv.org/abs/1404.2854.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
from __future__ import annotations
|
|
55
|
+
|
|
56
|
+
from functools import partial
|
|
57
|
+
from typing import Any, Callable
|
|
58
|
+
|
|
59
|
+
import numpy as np
|
|
60
|
+
from numpy.typing import NDArray
|
|
61
|
+
|
|
62
|
+
from derivkit.calculus_kit import CalculusKit
|
|
63
|
+
from derivkit.forecasting.fisher_gaussian import build_gaussian_fisher_matrix
|
|
64
|
+
from derivkit.utils.linalg import as_1d_data_vector, split_xy_covariance
|
|
65
|
+
|
|
66
|
+
MuXY = Callable[[NDArray[np.float64], NDArray[np.float64]], NDArray[np.float64] | float]
|
|
67
|
+
|
|
68
|
+
__all__ = [
|
|
69
|
+
"mu_xy_given_theta",
|
|
70
|
+
"mu_xy_given_x0",
|
|
71
|
+
"build_mu_theta_from_mu_xy",
|
|
72
|
+
"build_t_matrix",
|
|
73
|
+
"build_effective_covariance_r",
|
|
74
|
+
"effective_covariance_r_theta",
|
|
75
|
+
"build_xy_gaussian_fisher_matrix",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def mu_xy_given_theta(
|
|
80
|
+
x: NDArray[np.float64],
|
|
81
|
+
*,
|
|
82
|
+
theta: NDArray[np.float64],
|
|
83
|
+
mu_xy: MuXY,
|
|
84
|
+
) -> NDArray[np.float64]:
|
|
85
|
+
"""Evaluates the model predicted mean as a function of ``x`` at fixed parameters.
|
|
86
|
+
|
|
87
|
+
The input ``mu_xy`` is a callable that returns the model's mean prediction for the
|
|
88
|
+
observed quantity ``y`` given input values ``x`` and parameters ``theta``. This
|
|
89
|
+
wrapper holds ``theta`` fixed, so the resulting function depends only on ``x``.
|
|
90
|
+
This form is used when varying the inputs while keeping the parameter point
|
|
91
|
+
unchanged, such as when computing sensitivities with respect to ``x``.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
x: Input values at which to evaluate the model.
|
|
95
|
+
theta: Parameter values to hold fixed.
|
|
96
|
+
mu_xy: Function that predicts the mean of ``y`` given ``x`` and ``theta``.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
The models mean prediction for ``y`` at ``(x, theta)``, returned as a single
|
|
100
|
+
1D data vector.
|
|
101
|
+
"""
|
|
102
|
+
x = np.atleast_1d(np.asarray(x, dtype=np.float64))
|
|
103
|
+
theta = np.atleast_1d(np.asarray(theta, dtype=np.float64))
|
|
104
|
+
return as_1d_data_vector(mu_xy(x, theta))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def mu_xy_given_x0(
|
|
108
|
+
theta: NDArray[np.float64],
|
|
109
|
+
*,
|
|
110
|
+
x0: NDArray[np.float64],
|
|
111
|
+
mu_xy: MuXY,
|
|
112
|
+
) -> NDArray[np.float64]:
|
|
113
|
+
"""Evaluates the model predicted mean as a function of ``theta`` at fixed inputs.
|
|
114
|
+
|
|
115
|
+
The input ``mu_xy`` is a callable that returns the model's mean prediction for the
|
|
116
|
+
observed quantity ``y`` given input values ``x`` and parameters ``theta``. This
|
|
117
|
+
wrapper holds the inputs fixed at a chosen reference point ``x0``, so the resulting
|
|
118
|
+
function depends only on ``theta``. This form is used when varying the parameters
|
|
119
|
+
while treating the inputs as fixed.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
theta: Parameter values at which to evaluate the model.
|
|
123
|
+
x0: Input values to hold fixed.
|
|
124
|
+
mu_xy: Function that predicts the mean of ``y`` given ``x`` and ``theta``.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
The models mean prediction for ``y`` at ``(x0, theta)``, returned as a single
|
|
128
|
+
1D data vector.
|
|
129
|
+
"""
|
|
130
|
+
x0 = np.atleast_1d(np.asarray(x0, dtype=np.float64))
|
|
131
|
+
theta = np.atleast_1d(np.asarray(theta, dtype=np.float64))
|
|
132
|
+
return as_1d_data_vector(mu_xy(x0, theta))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def build_mu_theta_from_mu_xy(
|
|
136
|
+
mu_xy: MuXY,
|
|
137
|
+
*,
|
|
138
|
+
x0: NDArray[np.float64],
|
|
139
|
+
) -> Callable[[NDArray[np.float64]], NDArray[np.float64]]:
|
|
140
|
+
"""Constructs a mean function that depends only on the model parameters.
|
|
141
|
+
|
|
142
|
+
The input ``mu_xy`` predicts the model mean for the observed quantity given
|
|
143
|
+
input values and parameters. This helper fixes the input values at a chosen
|
|
144
|
+
reference point ``x0`` and returns a callable that depends only on the
|
|
145
|
+
parameters. The resulting function represents the model mean evaluated at
|
|
146
|
+
fixed inputs and is used when building parameter derivatives or Fisher
|
|
147
|
+
matrices.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
mu_xy: Function that predicts the mean of the observed quantity given
|
|
151
|
+
input values and parameters.
|
|
152
|
+
x0: Input values at which the model mean is evaluated.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
A callable that evaluates the model mean as a function of the parameters
|
|
156
|
+
with the inputs held fixed.
|
|
157
|
+
"""
|
|
158
|
+
x0 = np.atleast_1d(np.asarray(x0, dtype=np.float64))
|
|
159
|
+
return partial(mu_xy_given_x0, x0=x0, mu_xy=mu_xy)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def build_t_matrix(
|
|
163
|
+
mu_xy: MuXY,
|
|
164
|
+
*,
|
|
165
|
+
x0: NDArray[np.float64],
|
|
166
|
+
theta: NDArray[np.float64],
|
|
167
|
+
method: str | None = None,
|
|
168
|
+
n_workers: int = 1,
|
|
169
|
+
**dk_kwargs: Any,
|
|
170
|
+
) -> NDArray[np.float64]:
|
|
171
|
+
"""Computes the sensitivity of the model mean to changes in the inputs.
|
|
172
|
+
|
|
173
|
+
The returned matrix describes how the model mean for the observed quantity changes
|
|
174
|
+
when the input values are perturbed, evaluated at a reference input point ``x0``
|
|
175
|
+
and parameter point ``theta``. In the X–Y Gaussian formulation, this sensitivity
|
|
176
|
+
is used to propagate input uncertainty into an effective covariance for the
|
|
177
|
+
observed quantity.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
mu_xy: Function that predicts the mean of the observed quantity given input
|
|
181
|
+
values and parameters.
|
|
182
|
+
x0: Reference input values at which the sensitivity is evaluated.
|
|
183
|
+
theta: Parameter values at which the sensitivity is evaluated.
|
|
184
|
+
method: Optional derivative method name used by the derivative backend.
|
|
185
|
+
This option is forwarded through ``CalculusKit`` to the underlying
|
|
186
|
+
derivative engine (``DerivativeKit``).
|
|
187
|
+
If ``None``, the backend default is used.
|
|
188
|
+
n_workers: Number of workers used for derivative evaluations.
|
|
189
|
+
**dk_kwargs: Additional keyword arguments forwarded to the derivative engine.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
A matrix of sensitivities evaluated at ``(x0, theta)``, with one row per output
|
|
193
|
+
component and one column per input component.
|
|
194
|
+
"""
|
|
195
|
+
x0 = np.atleast_1d(np.asarray(x0, dtype=np.float64))
|
|
196
|
+
theta = np.atleast_1d(np.asarray(theta, dtype=np.float64))
|
|
197
|
+
|
|
198
|
+
mu_of_x = partial(mu_xy_given_theta, theta=theta, mu_xy=mu_xy)
|
|
199
|
+
|
|
200
|
+
ckit = CalculusKit(mu_of_x, x0)
|
|
201
|
+
jac = np.asarray(
|
|
202
|
+
ckit.jacobian(method=method, n_workers=n_workers, **dk_kwargs),
|
|
203
|
+
dtype=np.float64,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if jac.ndim == 1:
|
|
207
|
+
jac = jac[None, :]
|
|
208
|
+
return jac
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def build_effective_covariance_r(
|
|
212
|
+
*,
|
|
213
|
+
cov: NDArray[np.float64],
|
|
214
|
+
x0: NDArray[np.float64],
|
|
215
|
+
t: NDArray[np.float64],
|
|
216
|
+
) -> NDArray[np.float64]:
|
|
217
|
+
"""Computes an effective output covariance that includes input uncertainty.
|
|
218
|
+
|
|
219
|
+
The X–Y Gaussian formulation allows both the inputs and outputs to be noisy, with
|
|
220
|
+
possible correlations between input and output errors. This function combines the
|
|
221
|
+
input covariance, output covariance, and cross-covariance with a local sensitivity
|
|
222
|
+
matrix ``t`` (describing how the model mean changes with the inputs) to produce an
|
|
223
|
+
effective covariance for the outputs. The result is the covariance used in the
|
|
224
|
+
Gaussian likelihoods and Fisher matrix after input uncertainty has been propagated
|
|
225
|
+
to the output space.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
cov: Full covariance matrix for the stacked vector ``[x, y]``.
|
|
229
|
+
x0: Input values at which the model mean is evaluated.
|
|
230
|
+
t: Sensitivity matrix of the model mean with respect to the inputs, evaluated
|
|
231
|
+
at a chosen reference point.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
The effective covariance matrix for the output measurements.
|
|
235
|
+
"""
|
|
236
|
+
cov = np.asarray(cov, dtype=np.float64)
|
|
237
|
+
x0 = np.atleast_1d(np.asarray(x0, dtype=np.float64))
|
|
238
|
+
|
|
239
|
+
nx = int(x0.size)
|
|
240
|
+
cxx, cxy, cyy = split_xy_covariance(cov, nx=nx)
|
|
241
|
+
|
|
242
|
+
ny = cyy.shape[0]
|
|
243
|
+
t = np.asarray(t, dtype=np.float64)
|
|
244
|
+
if t.shape != (ny, nx):
|
|
245
|
+
raise ValueError(f"t must have shape ({ny}, {nx}); got {t.shape}.")
|
|
246
|
+
|
|
247
|
+
return cyy - (cxy.T @ t.T) - (t @ cxy) + (t @ cxx @ t.T)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def effective_covariance_r_theta(
|
|
251
|
+
theta: NDArray[np.float64],
|
|
252
|
+
*,
|
|
253
|
+
mu_xy: MuXY,
|
|
254
|
+
x0: NDArray[np.float64],
|
|
255
|
+
cov: NDArray[np.float64] | None = None,
|
|
256
|
+
method: str | None,
|
|
257
|
+
n_workers: int,
|
|
258
|
+
dk_kwargs: dict[str, Any],
|
|
259
|
+
) -> NDArray[np.float64]:
|
|
260
|
+
"""Evaluates the effective output covariance at a given parameter point.
|
|
261
|
+
|
|
262
|
+
The block covariances for the input and output measurements are treated as fixed.
|
|
263
|
+
The effective output covariance depends on the parameters through the local
|
|
264
|
+
sensitivity of the model mean to the inputs, evaluated at the reference inputs
|
|
265
|
+
``x0``. This function recomputes that sensitivity at the supplied ``theta`` and
|
|
266
|
+
returns the corresponding effective covariance.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
theta: Parameter values at which the effective covariance is evaluated.
|
|
270
|
+
mu_xy: Function that predicts the mean of the observed quantity given input
|
|
271
|
+
values and parameters.
|
|
272
|
+
x0: Reference input values used for the local sensitivity evaluation.
|
|
273
|
+
cov: Full covariance matrix for the stacked vector ``[x, y]``.
|
|
274
|
+
method: Optional derivative method name passed to the derivative engine.
|
|
275
|
+
n_workers: Number of workers used for derivative evaluations.
|
|
276
|
+
dk_kwargs: Additional keyword arguments forwarded to the derivative engine.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
The effective covariance matrix for the output measurements at ``theta``.
|
|
280
|
+
"""
|
|
281
|
+
x0 = np.atleast_1d(np.asarray(x0, dtype=np.float64))
|
|
282
|
+
theta = np.atleast_1d(np.asarray(theta, dtype=np.float64))
|
|
283
|
+
|
|
284
|
+
t_matrix = build_t_matrix(
|
|
285
|
+
mu_xy,
|
|
286
|
+
x0=x0,
|
|
287
|
+
theta=theta,
|
|
288
|
+
method=method,
|
|
289
|
+
n_workers=n_workers,
|
|
290
|
+
**dk_kwargs,
|
|
291
|
+
)
|
|
292
|
+
return build_effective_covariance_r(cov=cov, x0=x0, t=t_matrix)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def build_xy_gaussian_fisher_matrix(
|
|
296
|
+
*,
|
|
297
|
+
theta0: NDArray[np.float64],
|
|
298
|
+
x0: NDArray[np.float64],
|
|
299
|
+
mu_xy: MuXY,
|
|
300
|
+
cov: NDArray[np.float64],
|
|
301
|
+
method: str | None = None,
|
|
302
|
+
n_workers: int = 1,
|
|
303
|
+
rcond: float = 1e-12,
|
|
304
|
+
symmetrize_dcov: bool = True,
|
|
305
|
+
**dk_kwargs: Any,
|
|
306
|
+
) -> NDArray[np.float64]:
|
|
307
|
+
"""Computes a Gaussian Fisher matrix when both inputs and outputs are noisy.
|
|
308
|
+
|
|
309
|
+
This function supports the X–Y Gaussian case, where measurement uncertainty is
|
|
310
|
+
present in the inputs and in the outputs, and the two may be correlated. Input
|
|
311
|
+
uncertainty is incorporated by forming an effective covariance for the output
|
|
312
|
+
measurements using a local sensitivity of the model mean to the inputs evaluated
|
|
313
|
+
at the reference inputs ``x0``. The Fisher matrix is then constructed at the
|
|
314
|
+
parameter point ``theta0`` using the model mean evaluated at ``x0`` and the
|
|
315
|
+
effective output covariance.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
theta0: Parameter values at which the Fisher matrix is evaluated.
|
|
319
|
+
x0: Reference input values used for the local sensitivity evaluation.
|
|
320
|
+
mu_xy: Function that predicts the mean of the observed quantity given input
|
|
321
|
+
values and parameters.
|
|
322
|
+
cov: Full covariance matrix for the stacked vector ``[x, y]``.
|
|
323
|
+
method: Optional derivative method name passed to the derivative engine.
|
|
324
|
+
n_workers: Number of workers used for derivative evaluations.
|
|
325
|
+
rcond: Cutoff used when solving linear systems involving the covariance.
|
|
326
|
+
symmetrize_dcov: Whether to symmetrize numerical covariance derivatives.
|
|
327
|
+
**dk_kwargs: Additional keyword arguments forwarded to the derivative engine.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
The Fisher information matrix evaluated at ``theta0``.
|
|
331
|
+
"""
|
|
332
|
+
theta0 = np.atleast_1d(np.asarray(theta0, dtype=np.float64))
|
|
333
|
+
x0 = np.atleast_1d(np.asarray(x0, dtype=np.float64))
|
|
334
|
+
cov = np.asarray(cov, dtype=np.float64)
|
|
335
|
+
|
|
336
|
+
mu_theta = build_mu_theta_from_mu_xy(mu_xy, x0=x0)
|
|
337
|
+
|
|
338
|
+
r_fn = partial(
|
|
339
|
+
effective_covariance_r_theta,
|
|
340
|
+
mu_xy=mu_xy,
|
|
341
|
+
x0=x0,
|
|
342
|
+
cov=cov,
|
|
343
|
+
method=method,
|
|
344
|
+
n_workers=n_workers,
|
|
345
|
+
dk_kwargs=dict(dk_kwargs),
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
return build_gaussian_fisher_matrix(
|
|
349
|
+
theta0=theta0,
|
|
350
|
+
cov=r_fn,
|
|
351
|
+
function=mu_theta,
|
|
352
|
+
method=method,
|
|
353
|
+
n_workers=n_workers,
|
|
354
|
+
rcond=rcond,
|
|
355
|
+
symmetrize_dcov=symmetrize_dcov,
|
|
356
|
+
**dk_kwargs,
|
|
357
|
+
)
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""Core utilities for likelihoods-based forecasts.
|
|
2
|
+
|
|
3
|
+
This module provides functional helpers to
|
|
4
|
+
|
|
5
|
+
- compute first-, second-, and third-order derivatives of a model with
|
|
6
|
+
respect to its parameters, and
|
|
7
|
+
- build Fisher, doublet-DALI, and triplet-DALI forecast tensors from those
|
|
8
|
+
derivatives and a covariance matrix.
|
|
9
|
+
|
|
10
|
+
These functions are the low-level building blocks used by higher-level
|
|
11
|
+
forecasting interfaces in DerivKit. For details on the DALI expansion,
|
|
12
|
+
see e.g. https://doi.org/10.1103/PhysRevD.107.103506.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Any, Callable
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from numpy.typing import NDArray
|
|
19
|
+
|
|
20
|
+
from derivkit.calculus_kit import CalculusKit
|
|
21
|
+
from derivkit.utils.concurrency import normalize_workers
|
|
22
|
+
from derivkit.utils.linalg import invert_covariance
|
|
23
|
+
from derivkit.utils.types import ArrayLike1D, ArrayLike2D
|
|
24
|
+
from derivkit.utils.validate import validate_covariance_matrix_shape
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"SUPPORTED_FORECAST_ORDERS",
|
|
28
|
+
"get_forecast_tensors",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
#: The supported orders of the DALI expansion.
|
|
33
|
+
#:
|
|
34
|
+
#: A value of 1 corresponds to the Fisher matrix.
|
|
35
|
+
#: A value of 2 corresponds to the DALI doublet.
|
|
36
|
+
#: A value of 3 corresponds to the DALI triplet.
|
|
37
|
+
SUPPORTED_FORECAST_ORDERS = (1, 2, 3)
|
|
38
|
+
|
|
39
|
+
SUPPORTED_DERIVATIVE_ORDERS = (1, 2, 3)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_forecast_tensors(
|
|
43
|
+
function: Callable[[ArrayLike1D], float | NDArray[np.floating]],
|
|
44
|
+
theta0: ArrayLike1D,
|
|
45
|
+
cov: ArrayLike2D,
|
|
46
|
+
*,
|
|
47
|
+
forecast_order: int = 1,
|
|
48
|
+
method: str | None = None,
|
|
49
|
+
n_workers: int = 1,
|
|
50
|
+
**dk_kwargs: Any,
|
|
51
|
+
) -> dict[int, tuple[NDArray[np.float64], ...]]:
|
|
52
|
+
"""Returns a set of tensors according to the requested order of the forecast.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
function: The scalar or vector-valued function to
|
|
56
|
+
differentiate. It should accept a list or array of parameter
|
|
57
|
+
values as input and return either a scalar or a
|
|
58
|
+
:class:`np.ndarray` of observable values.
|
|
59
|
+
theta0: The points at which the
|
|
60
|
+
derivative is evaluated. A 1D array or list of parameter values
|
|
61
|
+
matching the expected input of the function.
|
|
62
|
+
cov: The covariance matrix of the observables. Should be a square
|
|
63
|
+
matrix with shape ``(n_observables, n_observables)``, where ``n_observables``
|
|
64
|
+
is the number of observables returned by the function.
|
|
65
|
+
forecast_order: The requested order of the forecast.
|
|
66
|
+
Currently supported values and their meaning are given in
|
|
67
|
+
:data:`SUPPORTED_FORECAST_ORDERS`.
|
|
68
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
69
|
+
If ``None``, the :class:`derivkit.derivative_kit.DerivativeKit`
|
|
70
|
+
default (``"adaptive"``) is used.
|
|
71
|
+
n_workers: Number of workers for per-parameter parallelization/threads.
|
|
72
|
+
Default ``1`` (serial). Inner batch evaluation is kept serial to
|
|
73
|
+
avoid nested pools.
|
|
74
|
+
**dk_kwargs: Additional keyword arguments passed to
|
|
75
|
+
:class:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
A dict mapping ``order -> tensors`` for all ``order = 1..forecast_order``.
|
|
79
|
+
|
|
80
|
+
The tensors are grouped by the *forecast order at which they first appear*:
|
|
81
|
+
|
|
82
|
+
- order 1: ``(F,)``
|
|
83
|
+
- order 2: ``(D_{(2,1)}, D_{(2,2)})``
|
|
84
|
+
- order 3: ``(T_{(3,1)}, T_{(3,2)}, T_{(3,3)})``
|
|
85
|
+
|
|
86
|
+
Here ``D_{(k,l)}`` and ``T_{(k,l)}`` denote tensors obtained by contracting
|
|
87
|
+
the ``k``-th order derivative with the ``l``-th order derivative via the
|
|
88
|
+
inverse covariance.
|
|
89
|
+
|
|
90
|
+
Each tensor axis has length ``p = len(theta0)``. Shapes are:
|
|
91
|
+
|
|
92
|
+
- ``F``: ``(p, p)``
|
|
93
|
+
- ``D_{(2,1)}``: ``(p, p, p)``
|
|
94
|
+
- ``D_{(2,2)}``: ``(p, p, p, p)``
|
|
95
|
+
- ``T_{(3,1)}``: ``(p, p, p, p)``
|
|
96
|
+
- ``T_{(3,2)}``: ``(p, p, p, p, p)``
|
|
97
|
+
- ``T_{(3,3)}``: ``(p, p, p, p, p, p)``
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
ValueError: If ``forecast_order`` is not in :data:`SUPPORTED_FORECAST_ORDERS`.
|
|
101
|
+
|
|
102
|
+
Warns:
|
|
103
|
+
RuntimeWarning: If ``cov`` is not symmetric (proceeds as-is, no symmetrization),
|
|
104
|
+
is ill-conditioned (large condition number), or inversion
|
|
105
|
+
falls back to the pseudoinverse.
|
|
106
|
+
"""
|
|
107
|
+
try:
|
|
108
|
+
forecast_order = int(forecast_order)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
raise TypeError(f"forecast_order must be an int;"
|
|
111
|
+
f" got {type(forecast_order)}.") from e
|
|
112
|
+
|
|
113
|
+
if forecast_order not in SUPPORTED_FORECAST_ORDERS:
|
|
114
|
+
raise ValueError(
|
|
115
|
+
f"forecast_order={forecast_order} is not supported. "
|
|
116
|
+
f"Supported values: {SUPPORTED_FORECAST_ORDERS}."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
theta0_arr = np.asarray(theta0, dtype=np.float64).reshape(-1)
|
|
120
|
+
if theta0_arr.size == 0:
|
|
121
|
+
raise ValueError("theta0 must be non-empty 1D.")
|
|
122
|
+
|
|
123
|
+
cov_arr = validate_covariance_matrix_shape(cov)
|
|
124
|
+
n_observables = cov_arr.shape[0]
|
|
125
|
+
|
|
126
|
+
y0 = np.asarray(function(theta0_arr), dtype=float)
|
|
127
|
+
y0_flat = y0.reshape(-1)
|
|
128
|
+
|
|
129
|
+
if y0_flat.size != n_observables:
|
|
130
|
+
raise ValueError(
|
|
131
|
+
f"Expected {n_observables} observables from model "
|
|
132
|
+
f"(from cov {cov_arr.shape}), "
|
|
133
|
+
f"but got {y0_flat.size} (output shape {y0.shape})."
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
invcov = invert_covariance(cov_arr, warn_prefix="get_forecast_tensors")
|
|
137
|
+
|
|
138
|
+
forecast_tensors: dict[int, tuple[NDArray[np.float64], ...]] = {}
|
|
139
|
+
derivatives: dict[int, NDArray[np.float64]] = {}
|
|
140
|
+
|
|
141
|
+
contractions = {
|
|
142
|
+
1: {1: "ia,ij,jb->ab"},
|
|
143
|
+
2: {1: "iab,ij,jc->abc",
|
|
144
|
+
2: "iab,ij,jcd->abcd"},
|
|
145
|
+
3: {1: "iabc,ij,jd->abcd",
|
|
146
|
+
2: "iabc,ij,jde->abcde",
|
|
147
|
+
3: "iabc,ij,jdef->abcdef"},
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
for order1 in range(1, 1 + forecast_order):
|
|
151
|
+
derivatives[order1] = _get_derivatives(
|
|
152
|
+
function,
|
|
153
|
+
theta0_arr,
|
|
154
|
+
cov_arr,
|
|
155
|
+
order=order1,
|
|
156
|
+
n_workers=n_workers,
|
|
157
|
+
method=method,
|
|
158
|
+
**dk_kwargs,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
tensors_at_order: list[NDArray[np.float64]] = []
|
|
162
|
+
for order2 in contractions[order1]:
|
|
163
|
+
tensors_at_order.append(
|
|
164
|
+
np.einsum(
|
|
165
|
+
contractions[order1][order2],
|
|
166
|
+
derivatives[order1],
|
|
167
|
+
invcov,
|
|
168
|
+
derivatives[order2],
|
|
169
|
+
).astype(np.float64, copy=False)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
forecast_tensors[order1] = tuple(tensors_at_order)
|
|
173
|
+
|
|
174
|
+
expected_keys = set(range(1, forecast_order + 1))
|
|
175
|
+
if set(forecast_tensors.keys()) != expected_keys:
|
|
176
|
+
raise RuntimeError(
|
|
177
|
+
f"internal error: forecast_tensors keys {sorted(forecast_tensors.keys())} "
|
|
178
|
+
f"!= expected {sorted(expected_keys)}."
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
return forecast_tensors
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _get_derivatives(
|
|
185
|
+
function: Callable[[ArrayLike1D], float | NDArray[np.floating]],
|
|
186
|
+
theta0: ArrayLike1D,
|
|
187
|
+
cov: ArrayLike2D,
|
|
188
|
+
*,
|
|
189
|
+
order: int,
|
|
190
|
+
method: str | None = None,
|
|
191
|
+
n_workers: int = 1,
|
|
192
|
+
**dk_kwargs: Any,
|
|
193
|
+
) -> NDArray[np.float64]:
|
|
194
|
+
"""Returns derivatives of the observables of the requested order.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
function: The scalar or vector-valued function to
|
|
198
|
+
differentiate. It should accept a list or array of parameter
|
|
199
|
+
values as input and return either a scalar or a
|
|
200
|
+
:class:`np.ndarray` of observable values.
|
|
201
|
+
theta0: The points at which the
|
|
202
|
+
derivative is evaluated. A 1D array or list of parameter values
|
|
203
|
+
matching the expected input of the function.
|
|
204
|
+
cov: The covariance matrix of the observables. Should be a square
|
|
205
|
+
matrix with shape ``(n_observables, n_observables)``, where ``n_observables``
|
|
206
|
+
is the number of observables returned by the function.
|
|
207
|
+
order: The requested order of the derivatives. The value determines
|
|
208
|
+
the order of the derivative that is returned. Currently supported
|
|
209
|
+
values are given in :data:`SUPPORTED_DERIVATIVE_ORDERS`.
|
|
210
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``). If ``None``,
|
|
211
|
+
the DerivativeKit default ("adaptive") is used.
|
|
212
|
+
n_workers: Number of workers for per-parameter parallelization
|
|
213
|
+
(threads). Default ``1`` (serial).
|
|
214
|
+
**dk_kwargs: Additional keyword arguments passed to
|
|
215
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Array of derivative values. For ``order == 1``, the
|
|
219
|
+
shape is ``(n_observables, n_parameters)`` (first-order derivatives).
|
|
220
|
+
For ``order == 2``, the shape is
|
|
221
|
+
``(n_observables, n_parameters, n_parameters)`` (second-order derivatives).
|
|
222
|
+
For ``order == 3``, the shape is
|
|
223
|
+
``(n_observables, n_parameters, n_parameters, n_parameters)`` (third-order derivatives).
|
|
224
|
+
|
|
225
|
+
Raises:
|
|
226
|
+
ValueError: An error occurred if a derivative was requested of
|
|
227
|
+
higher order than 3.
|
|
228
|
+
RuntimeError: An error occurred if a ValueError was not raised
|
|
229
|
+
after calling the function.
|
|
230
|
+
"""
|
|
231
|
+
if order not in SUPPORTED_DERIVATIVE_ORDERS:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
f"Requested derivative order={order} is not supported. "
|
|
234
|
+
f"Supported values: {SUPPORTED_DERIVATIVE_ORDERS}."
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
theta0_arr = np.atleast_1d(theta0)
|
|
238
|
+
cov_arr = np.asarray(cov, dtype=float)
|
|
239
|
+
|
|
240
|
+
n_parameters = theta0_arr.shape[0]
|
|
241
|
+
n_observables = cov_arr.shape[0]
|
|
242
|
+
|
|
243
|
+
n_workers = normalize_workers(n_workers)
|
|
244
|
+
|
|
245
|
+
def _vectorize_model_output(theta: ArrayLike1D) -> NDArray[np.float64]:
|
|
246
|
+
"""Returns model output as a 1D float64 vector."""
|
|
247
|
+
y = np.asarray(function(theta), dtype=np.float64)
|
|
248
|
+
if y.ndim > 1:
|
|
249
|
+
raise TypeError(
|
|
250
|
+
"model must return a scalar or 1D vector of observables; "
|
|
251
|
+
f"got shape {y.shape}."
|
|
252
|
+
)
|
|
253
|
+
return np.atleast_1d(y)
|
|
254
|
+
|
|
255
|
+
ckit = CalculusKit(_vectorize_model_output, theta0_arr)
|
|
256
|
+
|
|
257
|
+
if order == 1:
|
|
258
|
+
j_raw = np.asarray(
|
|
259
|
+
ckit.jacobian(
|
|
260
|
+
method=method,
|
|
261
|
+
n_workers=n_workers, # allow outer parallelism across params
|
|
262
|
+
**dk_kwargs,
|
|
263
|
+
),
|
|
264
|
+
dtype=float,
|
|
265
|
+
)
|
|
266
|
+
if j_raw.shape == (n_observables, n_parameters):
|
|
267
|
+
return j_raw
|
|
268
|
+
else:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
f"jacobian returned unexpected shape {j_raw.shape}; "
|
|
271
|
+
f"expected ({n_observables},{n_parameters})."
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
elif order == 2:
|
|
275
|
+
# Build Hessian tensor once (shape expected (n_observables, n_parameters, n_parameters)),
|
|
276
|
+
# then return as (n_parameters, n_parameters, n_observables) for downstream einsum.
|
|
277
|
+
h_raw = np.asarray(
|
|
278
|
+
ckit.hessian(
|
|
279
|
+
method=method,
|
|
280
|
+
n_workers=n_workers, # allow outer parallelism across params
|
|
281
|
+
**dk_kwargs,
|
|
282
|
+
),
|
|
283
|
+
dtype=float,
|
|
284
|
+
)
|
|
285
|
+
if h_raw.shape == (n_observables, n_parameters, n_parameters):
|
|
286
|
+
return h_raw
|
|
287
|
+
else:
|
|
288
|
+
raise ValueError(
|
|
289
|
+
f"hessian returned unexpected shape {h_raw.shape}; "
|
|
290
|
+
f"expected ({n_observables},{n_parameters},{n_parameters})."
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
elif order == 3:
|
|
295
|
+
hh_raw = np.asarray(
|
|
296
|
+
ckit.hyper_hessian(
|
|
297
|
+
method=method,
|
|
298
|
+
n_workers=n_workers,
|
|
299
|
+
**dk_kwargs,
|
|
300
|
+
),
|
|
301
|
+
dtype=float,
|
|
302
|
+
)
|
|
303
|
+
if hh_raw.shape == (n_observables, n_parameters, n_parameters, n_parameters):
|
|
304
|
+
return hh_raw
|
|
305
|
+
else:
|
|
306
|
+
raise ValueError(
|
|
307
|
+
f"hyper_hessian returned unexpected shape {hh_raw.shape}; "
|
|
308
|
+
f"expected ({n_observables},{n_parameters},{n_parameters},{n_parameters})."
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
else:
|
|
313
|
+
raise ValueError(f"Unsupported value of {order}.")
|