derivkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- derivkit/__init__.py +22 -0
- derivkit/calculus/__init__.py +17 -0
- derivkit/calculus/calculus_core.py +152 -0
- derivkit/calculus/gradient.py +97 -0
- derivkit/calculus/hessian.py +528 -0
- derivkit/calculus/hyper_hessian.py +296 -0
- derivkit/calculus/jacobian.py +156 -0
- derivkit/calculus_kit.py +128 -0
- derivkit/derivative_kit.py +315 -0
- derivkit/derivatives/__init__.py +6 -0
- derivkit/derivatives/adaptive/__init__.py +5 -0
- derivkit/derivatives/adaptive/adaptive_fit.py +238 -0
- derivkit/derivatives/adaptive/batch_eval.py +179 -0
- derivkit/derivatives/adaptive/diagnostics.py +325 -0
- derivkit/derivatives/adaptive/grid.py +333 -0
- derivkit/derivatives/adaptive/polyfit_utils.py +513 -0
- derivkit/derivatives/adaptive/spacing.py +66 -0
- derivkit/derivatives/adaptive/transforms.py +245 -0
- derivkit/derivatives/autodiff/__init__.py +1 -0
- derivkit/derivatives/autodiff/jax_autodiff.py +95 -0
- derivkit/derivatives/autodiff/jax_core.py +217 -0
- derivkit/derivatives/autodiff/jax_utils.py +146 -0
- derivkit/derivatives/finite/__init__.py +5 -0
- derivkit/derivatives/finite/batch_eval.py +91 -0
- derivkit/derivatives/finite/core.py +84 -0
- derivkit/derivatives/finite/extrapolators.py +511 -0
- derivkit/derivatives/finite/finite_difference.py +247 -0
- derivkit/derivatives/finite/stencil.py +206 -0
- derivkit/derivatives/fornberg.py +245 -0
- derivkit/derivatives/local_polynomial_derivative/__init__.py +1 -0
- derivkit/derivatives/local_polynomial_derivative/diagnostics.py +90 -0
- derivkit/derivatives/local_polynomial_derivative/fit.py +199 -0
- derivkit/derivatives/local_polynomial_derivative/local_poly_config.py +95 -0
- derivkit/derivatives/local_polynomial_derivative/local_polynomial_derivative.py +205 -0
- derivkit/derivatives/local_polynomial_derivative/sampling.py +72 -0
- derivkit/derivatives/tabulated_model/__init__.py +1 -0
- derivkit/derivatives/tabulated_model/one_d.py +247 -0
- derivkit/forecast_kit.py +783 -0
- derivkit/forecasting/__init__.py +1 -0
- derivkit/forecasting/dali.py +78 -0
- derivkit/forecasting/expansions.py +486 -0
- derivkit/forecasting/fisher.py +298 -0
- derivkit/forecasting/fisher_gaussian.py +171 -0
- derivkit/forecasting/fisher_xy.py +357 -0
- derivkit/forecasting/forecast_core.py +313 -0
- derivkit/forecasting/getdist_dali_samples.py +429 -0
- derivkit/forecasting/getdist_fisher_samples.py +235 -0
- derivkit/forecasting/laplace.py +259 -0
- derivkit/forecasting/priors_core.py +860 -0
- derivkit/forecasting/sampling_utils.py +388 -0
- derivkit/likelihood_kit.py +114 -0
- derivkit/likelihoods/__init__.py +1 -0
- derivkit/likelihoods/gaussian.py +136 -0
- derivkit/likelihoods/poisson.py +176 -0
- derivkit/utils/__init__.py +13 -0
- derivkit/utils/concurrency.py +213 -0
- derivkit/utils/extrapolation.py +254 -0
- derivkit/utils/linalg.py +513 -0
- derivkit/utils/logger.py +26 -0
- derivkit/utils/numerics.py +262 -0
- derivkit/utils/sandbox.py +74 -0
- derivkit/utils/types.py +15 -0
- derivkit/utils/validate.py +811 -0
- derivkit-1.0.0.dist-info/METADATA +50 -0
- derivkit-1.0.0.dist-info/RECORD +68 -0
- derivkit-1.0.0.dist-info/WHEEL +5 -0
- derivkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- derivkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
"""Contains functions used in constructing the Hessian of a scalar-valued function."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from functools import partial
|
|
5
|
+
from typing import Any, Tuple
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from numpy.typing import ArrayLike, NDArray
|
|
9
|
+
|
|
10
|
+
from derivkit.derivative_kit import DerivativeKit
|
|
11
|
+
from derivkit.utils.concurrency import (
|
|
12
|
+
parallel_execute,
|
|
13
|
+
resolve_inner_from_outer,
|
|
14
|
+
)
|
|
15
|
+
from derivkit.utils.sandbox import get_partial_function
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"build_hessian",
|
|
19
|
+
"build_hessian_diag",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def build_hessian(
|
|
24
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
25
|
+
theta0: np.ndarray,
|
|
26
|
+
method: str | None = None,
|
|
27
|
+
n_workers: int = 1,
|
|
28
|
+
**dk_kwargs: Any,
|
|
29
|
+
) -> NDArray[np.floating]:
|
|
30
|
+
"""Returns the full Hessian of a function.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
function: The function to be differentiated.
|
|
34
|
+
theta0: The parameter vector at which the Hessian is evaluated.
|
|
35
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
36
|
+
If ``None``, the :class:`derivkit.derivative_kit.DerivativeKit`
|
|
37
|
+
default (``"adaptive"``) is used.
|
|
38
|
+
n_workers: Parallel tasks across output components / Hessian entries.
|
|
39
|
+
**dk_kwargs: Extra options forwarded to
|
|
40
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Always returns the full Hessian with shape:
|
|
44
|
+
|
|
45
|
+
- (p, p) if ``function(theta0)`` is scalar with ``p`` the number of parameters.
|
|
46
|
+
- (``*out_shape``, p, p) if ``function(theta0)`` has shape ``out_shape``.
|
|
47
|
+
|
|
48
|
+
The output shape is fixed; use ``build_hessian_diag()`` if only the diagonal is needed.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
FloatingPointError: If non-finite values are encountered.
|
|
52
|
+
ValueError: If ``theta0`` is an empty array.
|
|
53
|
+
TypeError: If a single output component (flattened scalar subpath) does not return a scalar.
|
|
54
|
+
"""
|
|
55
|
+
return _build_hessian_internal(
|
|
56
|
+
function, theta0, method=method, n_workers=n_workers, diag=False, **dk_kwargs
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def build_hessian_diag(
|
|
61
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
62
|
+
theta0: np.ndarray,
|
|
63
|
+
method: str | None = None,
|
|
64
|
+
n_workers: int = 1,
|
|
65
|
+
**dk_kwargs: Any,
|
|
66
|
+
) -> np.ndarray:
|
|
67
|
+
"""Returns the diagonal of the Hessian of a function.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
function: The function to be differentiated.
|
|
71
|
+
theta0: The parameter vector at which the Hessian is evaluated.
|
|
72
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
73
|
+
If ``None``, the :class:`derivkit.derivative_kit.DerivativeKit`
|
|
74
|
+
default (``"adaptive"``) is used.
|
|
75
|
+
n_workers: Parallel tasks across output components / Hessian entries.
|
|
76
|
+
**dk_kwargs: Additional keyword arguments passed to
|
|
77
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
78
|
+
You may optionally pass ``inner_workers=<int>`` here to override
|
|
79
|
+
the inner policy.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Returns only the diagonal entries of the Hessian.
|
|
83
|
+
|
|
84
|
+
- (p,) if ``function(theta0)`` is scalar.
|
|
85
|
+
- (``*out_shape``, p) if ``function(theta0)`` has shape ``out_shape``.
|
|
86
|
+
|
|
87
|
+
This reduction in rank is intentional to avoid computing or storing off-diagonal terms.
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
FloatingPointError: If non-finite values are encountered.
|
|
91
|
+
ValueError: If ``theta0`` is an empty array.
|
|
92
|
+
TypeError: If evaluating a single output component does not return a scalar.
|
|
93
|
+
"""
|
|
94
|
+
return _build_hessian_internal(
|
|
95
|
+
function, theta0, method=method, n_workers=n_workers, diag=True, **dk_kwargs
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def gauss_newton_hessian(*args, **kwargs):
|
|
100
|
+
"""This is a placeholder for a Gauss-Newton Hessian computation function."""
|
|
101
|
+
raise NotImplementedError
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _compute_component_hessian(
|
|
105
|
+
idx: int,
|
|
106
|
+
theta: NDArray[np.floating],
|
|
107
|
+
method: str | None,
|
|
108
|
+
inner_workers: int | None,
|
|
109
|
+
return_diag: bool,
|
|
110
|
+
dk_kwargs: dict,
|
|
111
|
+
function: Callable[[ArrayLike], float | np.ndarray]
|
|
112
|
+
,) -> NDArray[np.floating]:
|
|
113
|
+
"""Compute the Hessian (or its diagonal) for one output component.
|
|
114
|
+
|
|
115
|
+
When ``function(theta)`` is tensor-valued, we ravel the output and take the
|
|
116
|
+
scalar component at flat index ``idx``. We then differentiate that scalar
|
|
117
|
+
component with respect to all parameters in ``theta``.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
idx: Flat index into ``function(theta)`` after raveling.
|
|
121
|
+
theta: Parameter vector where derivatives are evaluated.
|
|
122
|
+
method: Derivative method name or alias
|
|
123
|
+
(e.g., ``"adaptive"``, ``"finite"``).
|
|
124
|
+
inner_workers: Optional parallelism hint for the differentiation engine.
|
|
125
|
+
return_diag: If True, return only the diagonal entries.
|
|
126
|
+
dk_kwargs: Extra options forwarded to
|
|
127
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
128
|
+
function: Original function to differentiate.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
(p, p) array for full Hessian or (p,) array for diagonal only,
|
|
132
|
+
where ``p = theta.size``.
|
|
133
|
+
"""
|
|
134
|
+
g = partial(_component_scalar_eval, function=function, idx=int(idx))
|
|
135
|
+
|
|
136
|
+
if return_diag:
|
|
137
|
+
return _build_hessian_scalar_diag(g, theta, method, 1, inner_workers, **dk_kwargs)
|
|
138
|
+
else:
|
|
139
|
+
return _build_hessian_scalar_full(g, theta, method, 1, inner_workers, **dk_kwargs)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _component_scalar_eval(
|
|
143
|
+
theta_vec: NDArray[np.floating],
|
|
144
|
+
*,
|
|
145
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
146
|
+
idx: int,
|
|
147
|
+
) -> float:
|
|
148
|
+
"""Returns a scalar from a function output.
|
|
149
|
+
|
|
150
|
+
The scalar can be the function output itself if the function is scalar-valued,
|
|
151
|
+
or a single component of the function output if the function is tensor-valued.
|
|
152
|
+
The function output is flattened before returning the component.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
theta_vec: The parameter vector at which the function is evaluated.
|
|
156
|
+
function: The original function to be differentiated.
|
|
157
|
+
idx: The index of the flattened output component to extract.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
The value of the specified component.
|
|
161
|
+
"""
|
|
162
|
+
val = np.asarray(function(theta_vec))
|
|
163
|
+
return float(val.ravel()[idx])
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _build_hessian_scalar_full(
|
|
167
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
168
|
+
theta: np.ndarray,
|
|
169
|
+
method: str | None,
|
|
170
|
+
outer_workers: int,
|
|
171
|
+
inner_workers: int | None,
|
|
172
|
+
**dk_kwargs: Any,
|
|
173
|
+
) -> np.ndarray:
|
|
174
|
+
"""Returns the full (p, p) Hessian for a scalar-valued function.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
function: The function to be differentiated.
|
|
178
|
+
theta: The parameter vector at which the Hessian is evaluated.
|
|
179
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
180
|
+
outer_workers: Number of outer parallel workers for Hessian entries.
|
|
181
|
+
inner_workers: Optional inner parallelism for the differentiation engine.
|
|
182
|
+
**dk_kwargs: Additional keyword arguments for
|
|
183
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
A 2D array representing the Hessian.
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
FloatingPointError: If non-finite values are encountered.
|
|
190
|
+
TypeError: If ``function`` does not return a scalar value.
|
|
191
|
+
"""
|
|
192
|
+
p = int(theta.size)
|
|
193
|
+
|
|
194
|
+
# Here we build a list of tasks for all unique Hessian entries (i, j).
|
|
195
|
+
# We only compute the upper triangle and diagonal, then mirror the results.
|
|
196
|
+
# This reduces computation by nearly half.
|
|
197
|
+
tasks: list[Tuple[Any, ...]] = [(function, theta, i, i, method, inner_workers, dk_kwargs) for i in range(p)]
|
|
198
|
+
tasks += [
|
|
199
|
+
(function, theta, i, j, method, inner_workers, dk_kwargs)
|
|
200
|
+
for i in range(p) for j in range(i + 1, p)
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
vals = parallel_execute(
|
|
204
|
+
_hessian_component_worker,
|
|
205
|
+
tasks,
|
|
206
|
+
outer_workers=outer_workers,
|
|
207
|
+
inner_workers=inner_workers,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
hess = np.empty((p, p), dtype=float)
|
|
211
|
+
k = 0
|
|
212
|
+
for i in range(p):
|
|
213
|
+
hess[i, i] = float(vals[k])
|
|
214
|
+
k += 1
|
|
215
|
+
for i in range(p):
|
|
216
|
+
for j in range(i + 1, p):
|
|
217
|
+
hij = float(vals[k])
|
|
218
|
+
k += 1
|
|
219
|
+
hess[i, j] = hij
|
|
220
|
+
hess[j, i] = hij
|
|
221
|
+
|
|
222
|
+
if not np.isfinite(hess).all():
|
|
223
|
+
raise FloatingPointError("Non-finite values encountered in Hessian.")
|
|
224
|
+
return hess
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _build_hessian_scalar_diag(
|
|
228
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
229
|
+
theta: np.ndarray,
|
|
230
|
+
method: str | None,
|
|
231
|
+
outer_workers: int,
|
|
232
|
+
inner_workers: int | None,
|
|
233
|
+
**dk_kwargs: Any,
|
|
234
|
+
) -> np.ndarray:
|
|
235
|
+
"""Returns the diagonal of the Hessian for a scalar-valued function.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
function: The function to be differentiated.
|
|
239
|
+
theta: The parameter vector at which the Hessian is evaluated.
|
|
240
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
241
|
+
outer_workers: Number of outer parallel workers for diagonal entries.
|
|
242
|
+
inner_workers: Optional inner parallelism for the differentiation engine.
|
|
243
|
+
**dk_kwargs: Additional keyword arguments for
|
|
244
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
A 1D array representing the diagonal of the Hessian.
|
|
248
|
+
|
|
249
|
+
Raises:
|
|
250
|
+
FloatingPointError: If non-finite values are encountered.
|
|
251
|
+
TypeError: If ``function`` does not return a scalar value.
|
|
252
|
+
"""
|
|
253
|
+
p = int(theta.size)
|
|
254
|
+
|
|
255
|
+
tasks: list[Tuple[Any, ...]] = [(function, theta, i, i, method, inner_workers, dk_kwargs) for i in range(p)]
|
|
256
|
+
vals = parallel_execute(
|
|
257
|
+
_hessian_component_worker,
|
|
258
|
+
tasks,
|
|
259
|
+
outer_workers=outer_workers,
|
|
260
|
+
inner_workers=inner_workers,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
diag = np.asarray(vals, dtype=float)
|
|
264
|
+
if not np.isfinite(diag).all():
|
|
265
|
+
raise FloatingPointError("Non-finite values encountered in Hessian diagonal.")
|
|
266
|
+
return diag
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _hessian_component_worker(
|
|
270
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
271
|
+
theta0: np.ndarray,
|
|
272
|
+
i: int,
|
|
273
|
+
j: int,
|
|
274
|
+
method: str | None,
|
|
275
|
+
inner_workers: int | None,
|
|
276
|
+
dk_kwargs: dict,
|
|
277
|
+
) -> float:
|
|
278
|
+
"""Returns one entry of the Hessian for a scalar-valued function.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
function: A function that returns a single value.
|
|
282
|
+
theta0: The parameter values where the derivative is evaluated.
|
|
283
|
+
i: Index of the first parameter.
|
|
284
|
+
j: Index of the second parameter.
|
|
285
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
286
|
+
If ``None``, the :class:`derivkit.derivative_kit.DerivativeKit`
|
|
287
|
+
default (``"adaptive"``) is used.
|
|
288
|
+
inner_workers: Optional inner parallelism for the differentiation engine.
|
|
289
|
+
dk_kwargs: Additional keyword arguments passed to
|
|
290
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
A single number showing how the rate of change in one parameter
|
|
294
|
+
depends on another.
|
|
295
|
+
"""
|
|
296
|
+
val = _hessian_component(
|
|
297
|
+
function=function,
|
|
298
|
+
theta0=theta0,
|
|
299
|
+
i=i,
|
|
300
|
+
j=j,
|
|
301
|
+
method=method,
|
|
302
|
+
n_workers=inner_workers or 1,
|
|
303
|
+
**dk_kwargs,
|
|
304
|
+
)
|
|
305
|
+
val_arr = np.asarray(val, dtype=float)
|
|
306
|
+
if val_arr.size != 1:
|
|
307
|
+
raise TypeError(
|
|
308
|
+
f"Hessian component must be scalar; got array with shape {val_arr.shape}."
|
|
309
|
+
)
|
|
310
|
+
return float(val_arr.item())
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _hessian_component(
|
|
314
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
315
|
+
theta0: np.ndarray,
|
|
316
|
+
i: int,
|
|
317
|
+
j: int,
|
|
318
|
+
method: str | None = None,
|
|
319
|
+
n_workers: int = 1,
|
|
320
|
+
**dk_kwargs: Any,
|
|
321
|
+
) -> float:
|
|
322
|
+
"""Returns one entry of the Hessian for a scalar-valued function.
|
|
323
|
+
|
|
324
|
+
This function measures how the rate of change in one parameter depends
|
|
325
|
+
on another. It handles both the pure and mixed second derivatives:
|
|
326
|
+
- If i == j, this is the second derivative with respect to a single parameter.
|
|
327
|
+
- If i != j, this is the mixed derivative, computed by first finding
|
|
328
|
+
how the function changes with parameter i while holding parameter j fixed,
|
|
329
|
+
and then differentiating that result with respect to parameter j.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
function: A function that returns a single value.
|
|
333
|
+
theta0: The parameter values where the derivative is evaluated.
|
|
334
|
+
i: Index of the first parameter.
|
|
335
|
+
j: Index of the second parameter.
|
|
336
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
337
|
+
If ``None``, the :class:`derivkit.derivative_kit.DerivativeKit`
|
|
338
|
+
default (``"adaptive"``) is used.
|
|
339
|
+
n_workers: Optional inner parallelism for the differentiation engine.
|
|
340
|
+
**dk_kwargs: Additional keyword arguments passed to
|
|
341
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
A single number showing how the rate of change in one parameter
|
|
345
|
+
depends on another.
|
|
346
|
+
|
|
347
|
+
Raises:
|
|
348
|
+
TypeError: If ``function`` does not return a scalar value.
|
|
349
|
+
"""
|
|
350
|
+
# Mixed derivative path: define a helper that computes how the function changes with parameter i
|
|
351
|
+
# when parameter j is temporarily set to a specific value.
|
|
352
|
+
# Then we take the derivative of that helper with respect to parameter j.
|
|
353
|
+
if i == j:
|
|
354
|
+
partial_vec1 = get_partial_function(function, i, theta0)
|
|
355
|
+
probe = np.asarray(partial_vec1(float(theta0[i])), dtype=float)
|
|
356
|
+
if probe.size != 1:
|
|
357
|
+
raise TypeError("build_hessian() expects a scalar-valued function.")
|
|
358
|
+
kit1 = DerivativeKit(partial_vec1, float(theta0[i]))
|
|
359
|
+
return kit1.differentiate(order=2, method=method, n_workers=n_workers, **dk_kwargs)
|
|
360
|
+
|
|
361
|
+
path = partial(
|
|
362
|
+
_mixed_partial_value,
|
|
363
|
+
function=function,
|
|
364
|
+
theta0=theta0,
|
|
365
|
+
i=i,
|
|
366
|
+
j=j,
|
|
367
|
+
method=method,
|
|
368
|
+
n_workers=n_workers,
|
|
369
|
+
dk_kwargs=dk_kwargs,
|
|
370
|
+
)
|
|
371
|
+
kit2 = DerivativeKit(path, float(theta0[j]))
|
|
372
|
+
return kit2.differentiate(order=1, method=method, n_workers=n_workers, **dk_kwargs)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _mixed_partial_value(
|
|
376
|
+
y: float,
|
|
377
|
+
*,
|
|
378
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
379
|
+
theta0: np.ndarray,
|
|
380
|
+
i: int,
|
|
381
|
+
j: int,
|
|
382
|
+
method: str | None,
|
|
383
|
+
n_workers: int | None,
|
|
384
|
+
dk_kwargs: dict,
|
|
385
|
+
) -> float:
|
|
386
|
+
"""Returns the first derivative with respect to parameter i while temporarily setting parameter j to a given value.
|
|
387
|
+
|
|
388
|
+
This helper does not compute the second derivative itself. It only returns
|
|
389
|
+
the first derivative of the function with respect to one parameter while
|
|
390
|
+
holding another fixed. The caller then takes the derivative of this result
|
|
391
|
+
with respect to that fixed parameter to get the mixed second derivative.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
y: The value to set for parameter j.
|
|
395
|
+
function: A function that returns a single value.
|
|
396
|
+
theta0: The parameter values where the derivative is evaluated.
|
|
397
|
+
i: Index of the first parameter.
|
|
398
|
+
j: Index of the second parameter.
|
|
399
|
+
method: Method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
400
|
+
If ``None``, the :class:`derivkit.derivative_kit.DerivativeKit` default
|
|
401
|
+
(``"adaptive"``) is used.
|
|
402
|
+
n_workers: Optional inner parallelism for the differentiation engine.
|
|
403
|
+
dk_kwargs: Additional keyword arguments passed to
|
|
404
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
The value of the partial derivative with respect to parameter i
|
|
408
|
+
when parameter j is set to y.
|
|
409
|
+
"""
|
|
410
|
+
theta = theta0.copy()
|
|
411
|
+
theta[j] = float(y)
|
|
412
|
+
partial_vec1 = get_partial_function(function, i, theta)
|
|
413
|
+
kit1 = DerivativeKit(partial_vec1, float(theta[i]))
|
|
414
|
+
val = kit1.differentiate(
|
|
415
|
+
order=1,
|
|
416
|
+
method=method,
|
|
417
|
+
n_workers=n_workers,
|
|
418
|
+
**dk_kwargs,
|
|
419
|
+
)
|
|
420
|
+
val_arr = np.asarray(val, dtype=float)
|
|
421
|
+
if val_arr.size != 1:
|
|
422
|
+
raise TypeError(
|
|
423
|
+
f"Mixed partial derivative must be scalar; got array with shape {val_arr.shape}."
|
|
424
|
+
)
|
|
425
|
+
return float(val_arr.item())
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
def _build_hessian_internal(
|
|
429
|
+
function: Callable[[ArrayLike], float | np.ndarray],
|
|
430
|
+
theta0: np.ndarray,
|
|
431
|
+
*,
|
|
432
|
+
method: str | None,
|
|
433
|
+
n_workers: int,
|
|
434
|
+
diag: bool,
|
|
435
|
+
**dk_kwargs: Any,
|
|
436
|
+
) -> np.ndarray:
|
|
437
|
+
"""Core Hessian builder (internal).
|
|
438
|
+
|
|
439
|
+
Computes either the full Hessian or only its diagonal at ``theta0``.
|
|
440
|
+
Used internally by:
|
|
441
|
+
|
|
442
|
+
- ``build_hessian(...)`` → ``diag=False`` (full)
|
|
443
|
+
- ``build_hessian_diag(...)`` → ``diag=True`` (diagonal only)
|
|
444
|
+
|
|
445
|
+
Args:
|
|
446
|
+
function:
|
|
447
|
+
Callable mapping parameters to a scalar or tensor. For tensor outputs,
|
|
448
|
+
the function is flattened and one scalar Hessian (or diagonal) is
|
|
449
|
+
computed per component, then reshaped back.
|
|
450
|
+
theta0:
|
|
451
|
+
Parameter vector (1D array) at which the Hessian is evaluated.
|
|
452
|
+
method:
|
|
453
|
+
Derivative method name or alias (e.g., ``"adaptive"``, ``"finite"``).
|
|
454
|
+
If ``None``, uses the :class:`derivkit.derivative_kit.DerivativeKit`
|
|
455
|
+
default (``"adaptive"``).
|
|
456
|
+
n_workers:
|
|
457
|
+
Number of outer parallel workers (across output components / Hessian entries).
|
|
458
|
+
You may pass ``inner_workers=<int>`` in ``dk_kwargs`` to override inner parallelism.
|
|
459
|
+
diag:
|
|
460
|
+
If ``True``, compute only the diagonal entries.
|
|
461
|
+
If ``False``, compute the full Hessian.
|
|
462
|
+
**dk_kwargs:
|
|
463
|
+
Additional keyword arguments forwarded to
|
|
464
|
+
:meth:`derivkit.derivative_kit.DerivativeKit.differentiate`.
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
If ``function(theta0)`` is scalar:
|
|
468
|
+
|
|
469
|
+
- ``diag=False``: array with shape ``(p, p)`` (full Hessian)
|
|
470
|
+
- ``diag=True``: array with shape ``(p,)`` (diagonal only)
|
|
471
|
+
|
|
472
|
+
If ``function(theta0)`` has shape ``out_shape``:
|
|
473
|
+
|
|
474
|
+
- ``diag=False``: array with shape ``(*out_shape, p, p)``
|
|
475
|
+
- ``diag=True``: array with shape ``(*out_shape, p)``
|
|
476
|
+
|
|
477
|
+
Raises:
|
|
478
|
+
FloatingPointError:
|
|
479
|
+
If non-finite values are encountered.
|
|
480
|
+
ValueError:
|
|
481
|
+
If ``theta0`` is empty.
|
|
482
|
+
TypeError:
|
|
483
|
+
If evaluating a single output component does not return a scalar.
|
|
484
|
+
|
|
485
|
+
Notes:
|
|
486
|
+
- When ``diag=True``, mixed partials are skipped for speed and memory efficiency.
|
|
487
|
+
- The inner worker count defaults to ``resolve_inner_from_outer(n_workers)`` unless
|
|
488
|
+
explicitly overridden via ``inner_workers`` in ``dk_kwargs``.
|
|
489
|
+
"""
|
|
490
|
+
theta = np.asarray(theta0, dtype=float).reshape(-1)
|
|
491
|
+
if theta.size == 0:
|
|
492
|
+
raise ValueError("theta0 must be a non-empty 1D array.")
|
|
493
|
+
|
|
494
|
+
y0 = np.asarray(function(theta))
|
|
495
|
+
out_shape = y0.shape
|
|
496
|
+
|
|
497
|
+
inner_override = dk_kwargs.pop("inner_workers", None)
|
|
498
|
+
outer = int(n_workers) if n_workers is not None else 1
|
|
499
|
+
inner = int(inner_override) if inner_override is not None else resolve_inner_from_outer(outer)
|
|
500
|
+
|
|
501
|
+
if y0.ndim == 0:
|
|
502
|
+
if diag:
|
|
503
|
+
return _build_hessian_scalar_diag(function, theta, method, outer, inner, **dk_kwargs)
|
|
504
|
+
else:
|
|
505
|
+
return _build_hessian_scalar_full(function, theta, method, outer, inner, **dk_kwargs)
|
|
506
|
+
|
|
507
|
+
# Tensor output: flatten and compute per-component Hessians.
|
|
508
|
+
# Treat the function output as a vector of length m = prod(out_shape),
|
|
509
|
+
# compute one scalar Hessian (or diagonal) per component, then reshape
|
|
510
|
+
# the stacked results back to the original output shape.
|
|
511
|
+
m = y0.size
|
|
512
|
+
tasks = [(i, theta, method, inner, diag, dk_kwargs, function) for i in range(m)]
|
|
513
|
+
vals = parallel_execute(
|
|
514
|
+
_compute_component_hessian, tasks, outer_workers=outer, inner_workers=inner
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Stack per-component results:
|
|
518
|
+
# each entry in `vals` is a Hessian of shape (p, p) or a diagonal of shape (p,).
|
|
519
|
+
arr = np.stack(vals, axis=0)
|
|
520
|
+
|
|
521
|
+
# Restore the original output layout and append parameter axes.
|
|
522
|
+
# Result shape:
|
|
523
|
+
# - (*out_shape, p, p) for full Hessians.
|
|
524
|
+
# - (*out_shape, p) for diagonals.
|
|
525
|
+
arr = arr.reshape(out_shape + arr.shape[1:])
|
|
526
|
+
if not np.isfinite(arr).all():
|
|
527
|
+
raise FloatingPointError("Non-finite values encountered in Hessian.")
|
|
528
|
+
return arr
|