pymc-extras 0.2.6__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymc_extras/deserialize.py +224 -0
- pymc_extras/inference/__init__.py +2 -2
- pymc_extras/inference/fit.py +1 -1
- pymc_extras/inference/laplace_approx/__init__.py +0 -0
- pymc_extras/inference/laplace_approx/find_map.py +347 -0
- pymc_extras/inference/laplace_approx/idata.py +392 -0
- pymc_extras/inference/laplace_approx/laplace.py +451 -0
- pymc_extras/inference/laplace_approx/scipy_interface.py +242 -0
- pymc_extras/inference/pathfinder/pathfinder.py +2 -2
- pymc_extras/linearmodel.py +3 -1
- pymc_extras/model/marginal/graph_analysis.py +4 -0
- pymc_extras/prior.py +1388 -0
- pymc_extras/statespace/core/statespace.py +78 -52
- pymc_extras/statespace/filters/kalman_smoother.py +1 -1
- {pymc_extras-0.2.6.dist-info → pymc_extras-0.3.1.dist-info}/METADATA +6 -4
- {pymc_extras-0.2.6.dist-info → pymc_extras-0.3.1.dist-info}/RECORD +18 -14
- pymc_extras/inference/find_map.py +0 -496
- pymc_extras/inference/laplace.py +0 -583
- pymc_extras/utils/pivoted_cholesky.py +0 -69
- {pymc_extras-0.2.6.dist-info → pymc_extras-0.3.1.dist-info}/WHEEL +0 -0
- {pymc_extras-0.2.6.dist-info → pymc_extras-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
from itertools import product
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
import arviz as az
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pymc as pm
|
|
7
|
+
import xarray as xr
|
|
8
|
+
|
|
9
|
+
from arviz import dict_to_dataset
|
|
10
|
+
from better_optimize.constants import minimize_method
|
|
11
|
+
from pymc.backends.arviz import coords_and_dims_for_inferencedata, find_constants, find_observations
|
|
12
|
+
from pymc.blocking import RaveledVars
|
|
13
|
+
from pymc.util import get_default_varnames
|
|
14
|
+
from scipy.optimize import OptimizeResult
|
|
15
|
+
from scipy.sparse.linalg import LinearOperator
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def make_default_labels(name: str, shape: tuple[int, ...]) -> list:
|
|
19
|
+
if len(shape) == 0:
|
|
20
|
+
return [name]
|
|
21
|
+
|
|
22
|
+
return [list(range(dim)) for dim in shape]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def make_unpacked_variable_names(names: list[str], model: pm.Model) -> list[str]:
|
|
26
|
+
coords = model.coords
|
|
27
|
+
initial_point = model.initial_point()
|
|
28
|
+
|
|
29
|
+
value_to_dim = {
|
|
30
|
+
value.name: model.named_vars_to_dims.get(model.values_to_rvs[value].name, None)
|
|
31
|
+
for value in model.value_vars
|
|
32
|
+
}
|
|
33
|
+
value_to_dim = {k: v for k, v in value_to_dim.items() if v is not None}
|
|
34
|
+
|
|
35
|
+
rv_to_dim = model.named_vars_to_dims
|
|
36
|
+
dims_dict = rv_to_dim | value_to_dim
|
|
37
|
+
|
|
38
|
+
unpacked_variable_names = []
|
|
39
|
+
for name in names:
|
|
40
|
+
shape = initial_point[name].shape
|
|
41
|
+
if shape:
|
|
42
|
+
dims = dims_dict.get(name)
|
|
43
|
+
if dims:
|
|
44
|
+
labels_by_dim = [
|
|
45
|
+
coords[dim] if shape[i] == len(coords[dim]) else np.arange(shape[i])
|
|
46
|
+
for i, dim in enumerate(dims)
|
|
47
|
+
]
|
|
48
|
+
else:
|
|
49
|
+
labels_by_dim = make_default_labels(name, shape)
|
|
50
|
+
labels = product(*labels_by_dim)
|
|
51
|
+
unpacked_variable_names.extend(
|
|
52
|
+
[f"{name}[{','.join(map(str, label))}]" for label in labels]
|
|
53
|
+
)
|
|
54
|
+
else:
|
|
55
|
+
unpacked_variable_names.extend([name])
|
|
56
|
+
return unpacked_variable_names
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def map_results_to_inference_data(
|
|
60
|
+
map_point: dict[str, float | int | np.ndarray],
|
|
61
|
+
model: pm.Model | None = None,
|
|
62
|
+
):
|
|
63
|
+
"""
|
|
64
|
+
Add the MAP point to an InferenceData object in the posterior group.
|
|
65
|
+
|
|
66
|
+
Unlike a typical posterior, the MAP point is a single point estimate rather than a distribution. As a result, it
|
|
67
|
+
does not have a chain or draw dimension, and is stored as a single point in the posterior group.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
idata: az.InferenceData
|
|
72
|
+
An InferenceData object to which the MAP point will be added.
|
|
73
|
+
map_point: dict
|
|
74
|
+
A dictionary containing the MAP point estimates for each variable. The keys should be the variable names, and
|
|
75
|
+
the values should be the corresponding MAP estimates.
|
|
76
|
+
model: Model, optional
|
|
77
|
+
A PyMC model. If None, the model is taken from the current model context.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
idata: az.InferenceData
|
|
82
|
+
The provided InferenceData, with the MAP point added to the posterior group.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
model = pm.modelcontext(model) if model is None else model
|
|
86
|
+
coords, dims = coords_and_dims_for_inferencedata(model)
|
|
87
|
+
initial_point = model.initial_point()
|
|
88
|
+
|
|
89
|
+
# The MAP point will have both the transformed and untransformed variables, so we need to ensure that
|
|
90
|
+
# we have the correct dimensions for each variable.
|
|
91
|
+
var_name_to_value_name = {
|
|
92
|
+
rv.name: value.name
|
|
93
|
+
for rv, value in model.rvs_to_values.items()
|
|
94
|
+
if rv not in model.observed_RVs
|
|
95
|
+
}
|
|
96
|
+
dims.update(
|
|
97
|
+
{
|
|
98
|
+
value_name: dims[var_name]
|
|
99
|
+
for var_name, value_name in var_name_to_value_name.items()
|
|
100
|
+
if var_name in dims and (initial_point[value_name].shape == map_point[var_name].shape)
|
|
101
|
+
}
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
constrained_names = [
|
|
105
|
+
x.name for x in get_default_varnames(model.unobserved_value_vars, include_transformed=False)
|
|
106
|
+
]
|
|
107
|
+
all_varnames = [
|
|
108
|
+
x.name for x in get_default_varnames(model.unobserved_value_vars, include_transformed=True)
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
unconstrained_names = set(all_varnames) - set(constrained_names)
|
|
112
|
+
|
|
113
|
+
idata = az.from_dict(
|
|
114
|
+
posterior={
|
|
115
|
+
k: np.expand_dims(v, (0, 1)) for k, v in map_point.items() if k in constrained_names
|
|
116
|
+
},
|
|
117
|
+
coords=coords,
|
|
118
|
+
dims=dims,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
if unconstrained_names:
|
|
122
|
+
unconstrained_posterior = az.from_dict(
|
|
123
|
+
posterior={
|
|
124
|
+
k: np.expand_dims(v, (0, 1))
|
|
125
|
+
for k, v in map_point.items()
|
|
126
|
+
if k in unconstrained_names
|
|
127
|
+
},
|
|
128
|
+
coords=coords,
|
|
129
|
+
dims=dims,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
idata["unconstrained_posterior"] = unconstrained_posterior.posterior
|
|
133
|
+
|
|
134
|
+
return idata
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def add_fit_to_inference_data(
|
|
138
|
+
idata: az.InferenceData, mu: RaveledVars, H_inv: np.ndarray, model: pm.Model | None = None
|
|
139
|
+
) -> az.InferenceData:
|
|
140
|
+
"""
|
|
141
|
+
Add the mean vector and covariance matrix of the Laplace approximation to an InferenceData object.
|
|
142
|
+
|
|
143
|
+
Parameters
|
|
144
|
+
----------
|
|
145
|
+
idata: az.InfereceData
|
|
146
|
+
An InferenceData object containing the approximated posterior samples.
|
|
147
|
+
mu: RaveledVars
|
|
148
|
+
The MAP estimate of the model parameters.
|
|
149
|
+
H_inv: np.ndarray
|
|
150
|
+
The inverse Hessian matrix of the log-posterior evaluated at the MAP estimate.
|
|
151
|
+
model: Model, optional
|
|
152
|
+
A PyMC model. If None, the model is taken from the current model context.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
idata: az.InferenceData
|
|
157
|
+
The provided InferenceData, with the mean vector and covariance matrix added to the "fit" group.
|
|
158
|
+
"""
|
|
159
|
+
model = pm.modelcontext(model) if model is None else model
|
|
160
|
+
|
|
161
|
+
variable_names, *_ = zip(*mu.point_map_info)
|
|
162
|
+
|
|
163
|
+
unpacked_variable_names = make_unpacked_variable_names(variable_names, model)
|
|
164
|
+
|
|
165
|
+
mean_dataarray = xr.DataArray(mu.data, dims=["rows"], coords={"rows": unpacked_variable_names})
|
|
166
|
+
|
|
167
|
+
data = {"mean_vector": mean_dataarray}
|
|
168
|
+
|
|
169
|
+
if H_inv is not None:
|
|
170
|
+
cov_dataarray = xr.DataArray(
|
|
171
|
+
H_inv,
|
|
172
|
+
dims=["rows", "columns"],
|
|
173
|
+
coords={"rows": unpacked_variable_names, "columns": unpacked_variable_names},
|
|
174
|
+
)
|
|
175
|
+
data["covariance_matrix"] = cov_dataarray
|
|
176
|
+
|
|
177
|
+
dataset = xr.Dataset(data)
|
|
178
|
+
idata.add_groups(fit=dataset)
|
|
179
|
+
|
|
180
|
+
return idata
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def add_data_to_inference_data(
|
|
184
|
+
idata: az.InferenceData,
|
|
185
|
+
progressbar: bool = True,
|
|
186
|
+
model: pm.Model | None = None,
|
|
187
|
+
compile_kwargs: dict | None = None,
|
|
188
|
+
) -> az.InferenceData:
|
|
189
|
+
"""
|
|
190
|
+
Add observed and constant data to an InferenceData object.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
idata: az.InferenceData
|
|
195
|
+
An InferenceData object containing the approximated posterior samples.
|
|
196
|
+
progressbar: bool
|
|
197
|
+
Whether to display a progress bar during computations. Default is True.
|
|
198
|
+
model: Model, optional
|
|
199
|
+
A PyMC model. If None, the model is taken from the current model context.
|
|
200
|
+
compile_kwargs: dict, optional
|
|
201
|
+
Additional keyword arguments to pass to pytensor.function.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
idata: az.InferenceData
|
|
206
|
+
The provided InferenceData, with observed and constant data added.
|
|
207
|
+
"""
|
|
208
|
+
model = pm.modelcontext(model) if model is None else model
|
|
209
|
+
|
|
210
|
+
if model.deterministics:
|
|
211
|
+
expand_dims = {}
|
|
212
|
+
if "chain" not in idata.posterior.coords:
|
|
213
|
+
expand_dims["chain"] = [0]
|
|
214
|
+
if "draw" not in idata.posterior.coords:
|
|
215
|
+
expand_dims["draw"] = [0]
|
|
216
|
+
|
|
217
|
+
idata.posterior = pm.compute_deterministics(
|
|
218
|
+
idata.posterior.expand_dims(expand_dims),
|
|
219
|
+
model=model,
|
|
220
|
+
merge_dataset=True,
|
|
221
|
+
progressbar=progressbar,
|
|
222
|
+
compile_kwargs=compile_kwargs,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
coords, dims = coords_and_dims_for_inferencedata(model)
|
|
226
|
+
|
|
227
|
+
observed_data = dict_to_dataset(
|
|
228
|
+
find_observations(model),
|
|
229
|
+
library=pm,
|
|
230
|
+
coords=coords,
|
|
231
|
+
dims=dims,
|
|
232
|
+
default_dims=[],
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
constant_data = dict_to_dataset(
|
|
236
|
+
find_constants(model),
|
|
237
|
+
library=pm,
|
|
238
|
+
coords=coords,
|
|
239
|
+
dims=dims,
|
|
240
|
+
default_dims=[],
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
idata.add_groups(
|
|
244
|
+
{"observed_data": observed_data, "constant_data": constant_data},
|
|
245
|
+
coords=coords,
|
|
246
|
+
dims=dims,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return idata
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def optimizer_result_to_dataset(
|
|
253
|
+
result: OptimizeResult,
|
|
254
|
+
method: minimize_method | Literal["basinhopping"],
|
|
255
|
+
mu: RaveledVars | None = None,
|
|
256
|
+
model: pm.Model | None = None,
|
|
257
|
+
) -> xr.Dataset:
|
|
258
|
+
"""
|
|
259
|
+
Convert an OptimizeResult object to an xarray Dataset object.
|
|
260
|
+
|
|
261
|
+
Parameters
|
|
262
|
+
----------
|
|
263
|
+
result: OptimizeResult
|
|
264
|
+
The result of the optimization process.
|
|
265
|
+
method: minimize_method or "basinhopping"
|
|
266
|
+
The optimization method used.
|
|
267
|
+
|
|
268
|
+
Returns
|
|
269
|
+
-------
|
|
270
|
+
dataset: xr.Dataset
|
|
271
|
+
An xarray Dataset containing the optimization results.
|
|
272
|
+
"""
|
|
273
|
+
if not isinstance(result, OptimizeResult):
|
|
274
|
+
raise TypeError("result must be an instance of OptimizeResult")
|
|
275
|
+
|
|
276
|
+
model = pm.modelcontext(model) if model is None else model
|
|
277
|
+
variable_names, *_ = zip(*mu.point_map_info)
|
|
278
|
+
unpacked_variable_names = make_unpacked_variable_names(variable_names, model)
|
|
279
|
+
|
|
280
|
+
data_vars = {}
|
|
281
|
+
|
|
282
|
+
if hasattr(result, "lowest_optimization_result"):
|
|
283
|
+
# If we did basinhopping, there's a results inside the results. We want to pop this out and collapse them,
|
|
284
|
+
# overwriting outer keys with the inner keys
|
|
285
|
+
inner_res = result.pop("lowest_optimization_result")
|
|
286
|
+
for key in inner_res.keys():
|
|
287
|
+
result[key] = inner_res[key]
|
|
288
|
+
|
|
289
|
+
if hasattr(result, "x"):
|
|
290
|
+
data_vars["x"] = xr.DataArray(
|
|
291
|
+
result.x, dims=["variables"], coords={"variables": unpacked_variable_names}
|
|
292
|
+
)
|
|
293
|
+
if hasattr(result, "fun"):
|
|
294
|
+
data_vars["fun"] = xr.DataArray(result.fun, dims=[])
|
|
295
|
+
if hasattr(result, "success"):
|
|
296
|
+
data_vars["success"] = xr.DataArray(result.success, dims=[])
|
|
297
|
+
if hasattr(result, "message"):
|
|
298
|
+
data_vars["message"] = xr.DataArray(str(result.message), dims=[])
|
|
299
|
+
if hasattr(result, "jac") and result.jac is not None:
|
|
300
|
+
jac = np.asarray(result.jac)
|
|
301
|
+
if jac.ndim == 1:
|
|
302
|
+
data_vars["jac"] = xr.DataArray(
|
|
303
|
+
jac, dims=["variables"], coords={"variables": unpacked_variable_names}
|
|
304
|
+
)
|
|
305
|
+
else:
|
|
306
|
+
data_vars["jac"] = xr.DataArray(
|
|
307
|
+
jac,
|
|
308
|
+
dims=["variables", "variables_aux"],
|
|
309
|
+
coords={
|
|
310
|
+
"variables": unpacked_variable_names,
|
|
311
|
+
"variables_aux": unpacked_variable_names,
|
|
312
|
+
},
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
if hasattr(result, "hess_inv") and result.hess_inv is not None:
|
|
316
|
+
hess_inv = result.hess_inv
|
|
317
|
+
if isinstance(hess_inv, LinearOperator):
|
|
318
|
+
n = hess_inv.shape[0]
|
|
319
|
+
eye = np.eye(n)
|
|
320
|
+
hess_inv_mat = np.column_stack([hess_inv.matvec(eye[:, i]) for i in range(n)])
|
|
321
|
+
hess_inv = hess_inv_mat
|
|
322
|
+
else:
|
|
323
|
+
hess_inv = np.asarray(hess_inv)
|
|
324
|
+
data_vars["hess_inv"] = xr.DataArray(
|
|
325
|
+
hess_inv,
|
|
326
|
+
dims=["variables", "variables_aux"],
|
|
327
|
+
coords={"variables": unpacked_variable_names, "variables_aux": unpacked_variable_names},
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
if hasattr(result, "nit"):
|
|
331
|
+
data_vars["nit"] = xr.DataArray(result.nit, dims=[])
|
|
332
|
+
if hasattr(result, "nfev"):
|
|
333
|
+
data_vars["nfev"] = xr.DataArray(result.nfev, dims=[])
|
|
334
|
+
if hasattr(result, "njev"):
|
|
335
|
+
data_vars["njev"] = xr.DataArray(result.njev, dims=[])
|
|
336
|
+
if hasattr(result, "status"):
|
|
337
|
+
data_vars["status"] = xr.DataArray(result.status, dims=[])
|
|
338
|
+
|
|
339
|
+
# Add any other fields present in result
|
|
340
|
+
for key, value in result.items():
|
|
341
|
+
if key in data_vars:
|
|
342
|
+
continue # already added
|
|
343
|
+
if value is None:
|
|
344
|
+
continue
|
|
345
|
+
arr = np.asarray(value)
|
|
346
|
+
|
|
347
|
+
# TODO: We can probably do something smarter here with a dictionary of all possible values and their expected
|
|
348
|
+
# dimensions.
|
|
349
|
+
dims = [f"{key}_dim_{i}" for i in range(arr.ndim)]
|
|
350
|
+
data_vars[key] = xr.DataArray(
|
|
351
|
+
arr,
|
|
352
|
+
dims=dims,
|
|
353
|
+
coords={f"{key}_dim_{i}": np.arange(arr.shape[i]) for i in range(len(dims))},
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
data_vars["method"] = xr.DataArray(np.array(method), dims=[])
|
|
357
|
+
|
|
358
|
+
return xr.Dataset(data_vars)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def add_optimizer_result_to_inference_data(
|
|
362
|
+
idata: az.InferenceData,
|
|
363
|
+
result: OptimizeResult,
|
|
364
|
+
method: minimize_method | Literal["basinhopping"],
|
|
365
|
+
mu: RaveledVars | None = None,
|
|
366
|
+
model: pm.Model | None = None,
|
|
367
|
+
) -> az.InferenceData:
|
|
368
|
+
"""
|
|
369
|
+
Add the optimization result to an InferenceData object.
|
|
370
|
+
|
|
371
|
+
Parameters
|
|
372
|
+
----------
|
|
373
|
+
idata: az.InferenceData
|
|
374
|
+
An InferenceData object containing the approximated posterior samples.
|
|
375
|
+
result: OptimizeResult
|
|
376
|
+
The result of the optimization process.
|
|
377
|
+
method: minimize_method or "basinhopping"
|
|
378
|
+
The optimization method used.
|
|
379
|
+
mu: RaveledVars, optional
|
|
380
|
+
The MAP estimate of the model parameters.
|
|
381
|
+
model: Model, optional
|
|
382
|
+
A PyMC model. If None, the model is taken from the current model context.
|
|
383
|
+
|
|
384
|
+
Returns
|
|
385
|
+
-------
|
|
386
|
+
idata: az.InferenceData
|
|
387
|
+
The provided InferenceData, with the optimization results added to the "optimizer" group.
|
|
388
|
+
"""
|
|
389
|
+
dataset = optimizer_result_to_dataset(result, method=method, mu=mu, model=model)
|
|
390
|
+
idata.add_groups({"optimizer_result": dataset})
|
|
391
|
+
|
|
392
|
+
return idata
|