pymc-extras 0.2.7__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. pymc_extras/inference/__init__.py +2 -2
  2. pymc_extras/inference/fit.py +1 -1
  3. pymc_extras/inference/laplace_approx/__init__.py +0 -0
  4. pymc_extras/inference/laplace_approx/find_map.py +354 -0
  5. pymc_extras/inference/laplace_approx/idata.py +393 -0
  6. pymc_extras/inference/laplace_approx/laplace.py +453 -0
  7. pymc_extras/inference/laplace_approx/scipy_interface.py +242 -0
  8. pymc_extras/inference/pathfinder/pathfinder.py +3 -4
  9. pymc_extras/linearmodel.py +3 -1
  10. pymc_extras/model/marginal/graph_analysis.py +4 -0
  11. pymc_extras/prior.py +38 -6
  12. pymc_extras/statespace/core/statespace.py +78 -52
  13. pymc_extras/statespace/filters/kalman_smoother.py +1 -1
  14. pymc_extras/statespace/models/structural/__init__.py +21 -0
  15. pymc_extras/statespace/models/structural/components/__init__.py +0 -0
  16. pymc_extras/statespace/models/structural/components/autoregressive.py +188 -0
  17. pymc_extras/statespace/models/structural/components/cycle.py +305 -0
  18. pymc_extras/statespace/models/structural/components/level_trend.py +257 -0
  19. pymc_extras/statespace/models/structural/components/measurement_error.py +137 -0
  20. pymc_extras/statespace/models/structural/components/regression.py +228 -0
  21. pymc_extras/statespace/models/structural/components/seasonality.py +445 -0
  22. pymc_extras/statespace/models/structural/core.py +900 -0
  23. pymc_extras/statespace/models/structural/utils.py +16 -0
  24. pymc_extras/statespace/models/utilities.py +285 -0
  25. pymc_extras/statespace/utils/constants.py +4 -4
  26. pymc_extras/statespace/utils/data_tools.py +3 -2
  27. {pymc_extras-0.2.7.dist-info → pymc_extras-0.4.0.dist-info}/METADATA +6 -6
  28. {pymc_extras-0.2.7.dist-info → pymc_extras-0.4.0.dist-info}/RECORD +30 -18
  29. pymc_extras/inference/find_map.py +0 -496
  30. pymc_extras/inference/laplace.py +0 -583
  31. pymc_extras/statespace/models/structural.py +0 -1679
  32. {pymc_extras-0.2.7.dist-info → pymc_extras-0.4.0.dist-info}/WHEEL +0 -0
  33. {pymc_extras-0.2.7.dist-info → pymc_extras-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,393 @@
1
+ from itertools import product
2
+ from typing import Literal
3
+
4
+ import arviz as az
5
+ import numpy as np
6
+ import pymc as pm
7
+ import xarray as xr
8
+
9
+ from arviz import dict_to_dataset
10
+ from better_optimize.constants import minimize_method
11
+ from pymc.backends.arviz import coords_and_dims_for_inferencedata, find_constants, find_observations
12
+ from pymc.blocking import RaveledVars
13
+ from pymc.util import get_default_varnames
14
+ from scipy.optimize import OptimizeResult
15
+ from scipy.sparse.linalg import LinearOperator
16
+
17
+
18
+ def make_default_labels(name: str, shape: tuple[int, ...]) -> list:
19
+ if len(shape) == 0:
20
+ return [name]
21
+
22
+ return [list(range(dim)) for dim in shape]
23
+
24
+
25
+ def make_unpacked_variable_names(names: list[str], model: pm.Model) -> list[str]:
26
+ coords = model.coords
27
+ initial_point = model.initial_point()
28
+
29
+ value_to_dim = {
30
+ value.name: model.named_vars_to_dims.get(model.values_to_rvs[value].name, None)
31
+ for value in model.value_vars
32
+ }
33
+ value_to_dim = {k: v for k, v in value_to_dim.items() if v is not None}
34
+
35
+ rv_to_dim = model.named_vars_to_dims
36
+ dims_dict = rv_to_dim | value_to_dim
37
+
38
+ unpacked_variable_names = []
39
+ for name in names:
40
+ shape = initial_point[name].shape
41
+ if shape:
42
+ dims = dims_dict.get(name)
43
+ if dims:
44
+ labels_by_dim = [
45
+ coords[dim] if shape[i] == len(coords[dim]) else np.arange(shape[i])
46
+ for i, dim in enumerate(dims)
47
+ ]
48
+ else:
49
+ labels_by_dim = make_default_labels(name, shape)
50
+ labels = product(*labels_by_dim)
51
+ unpacked_variable_names.extend(
52
+ [f"{name}[{','.join(map(str, label))}]" for label in labels]
53
+ )
54
+ else:
55
+ unpacked_variable_names.extend([name])
56
+ return unpacked_variable_names
57
+
58
+
59
+ def map_results_to_inference_data(
60
+ map_point: dict[str, float | int | np.ndarray],
61
+ model: pm.Model | None = None,
62
+ include_transformed: bool = True,
63
+ ):
64
+ """
65
+ Add the MAP point to an InferenceData object in the posterior group.
66
+
67
+ Unlike a typical posterior, the MAP point is a single point estimate rather than a distribution. As a result, it
68
+ does not have a chain or draw dimension, and is stored as a single point in the posterior group.
69
+
70
+ Parameters
71
+ ----------
72
+ map_point: dict
73
+ A dictionary containing the MAP point estimates for each variable. The keys should be the variable names, and
74
+ the values should be the corresponding MAP estimates.
75
+ model: Model, optional
76
+ A PyMC model. If None, the model is taken from the current model context.
77
+ include_transformed: bool
78
+ Whether to return transformed (unconstrained) variables in the constrained_posterior group. Default is True.
79
+
80
+ Returns
81
+ -------
82
+ idata: az.InferenceData
83
+ The provided InferenceData, with the MAP point added to the posterior group.
84
+ """
85
+
86
+ model = pm.modelcontext(model) if model is None else model
87
+ coords, dims = coords_and_dims_for_inferencedata(model)
88
+ initial_point = model.initial_point()
89
+
90
+ # The MAP point will have both the transformed and untransformed variables, so we need to ensure that
91
+ # we have the correct dimensions for each variable.
92
+ var_name_to_value_name = {
93
+ rv.name: value.name
94
+ for rv, value in model.rvs_to_values.items()
95
+ if rv not in model.observed_RVs
96
+ }
97
+ dims.update(
98
+ {
99
+ value_name: dims[var_name]
100
+ for var_name, value_name in var_name_to_value_name.items()
101
+ if var_name in dims and (initial_point[value_name].shape == map_point[var_name].shape)
102
+ }
103
+ )
104
+
105
+ constrained_names = [
106
+ x.name for x in get_default_varnames(model.unobserved_value_vars, include_transformed=False)
107
+ ]
108
+ all_varnames = [
109
+ x.name for x in get_default_varnames(model.unobserved_value_vars, include_transformed=True)
110
+ ]
111
+
112
+ unconstrained_names = set(all_varnames) - set(constrained_names)
113
+
114
+ idata = az.from_dict(
115
+ posterior={
116
+ k: np.expand_dims(v, (0, 1)) for k, v in map_point.items() if k in constrained_names
117
+ },
118
+ coords=coords,
119
+ dims=dims,
120
+ )
121
+
122
+ if unconstrained_names and include_transformed:
123
+ unconstrained_posterior = az.from_dict(
124
+ posterior={
125
+ k: np.expand_dims(v, (0, 1))
126
+ for k, v in map_point.items()
127
+ if k in unconstrained_names
128
+ },
129
+ coords=coords,
130
+ dims=dims,
131
+ )
132
+
133
+ idata["unconstrained_posterior"] = unconstrained_posterior.posterior
134
+
135
+ return idata
136
+
137
+
138
+ def add_fit_to_inference_data(
139
+ idata: az.InferenceData, mu: RaveledVars, H_inv: np.ndarray, model: pm.Model | None = None
140
+ ) -> az.InferenceData:
141
+ """
142
+ Add the mean vector and covariance matrix of the Laplace approximation to an InferenceData object.
143
+
144
+ Parameters
145
+ ----------
146
+ idata: az.InfereceData
147
+ An InferenceData object containing the approximated posterior samples.
148
+ mu: RaveledVars
149
+ The MAP estimate of the model parameters.
150
+ H_inv: np.ndarray
151
+ The inverse Hessian matrix of the log-posterior evaluated at the MAP estimate.
152
+ model: Model, optional
153
+ A PyMC model. If None, the model is taken from the current model context.
154
+
155
+ Returns
156
+ -------
157
+ idata: az.InferenceData
158
+ The provided InferenceData, with the mean vector and covariance matrix added to the "fit" group.
159
+ """
160
+ model = pm.modelcontext(model) if model is None else model
161
+
162
+ variable_names, *_ = zip(*mu.point_map_info)
163
+
164
+ unpacked_variable_names = make_unpacked_variable_names(variable_names, model)
165
+
166
+ mean_dataarray = xr.DataArray(mu.data, dims=["rows"], coords={"rows": unpacked_variable_names})
167
+
168
+ data = {"mean_vector": mean_dataarray}
169
+
170
+ if H_inv is not None:
171
+ cov_dataarray = xr.DataArray(
172
+ H_inv,
173
+ dims=["rows", "columns"],
174
+ coords={"rows": unpacked_variable_names, "columns": unpacked_variable_names},
175
+ )
176
+ data["covariance_matrix"] = cov_dataarray
177
+
178
+ dataset = xr.Dataset(data)
179
+ idata.add_groups(fit=dataset)
180
+
181
+ return idata
182
+
183
+
184
+ def add_data_to_inference_data(
185
+ idata: az.InferenceData,
186
+ progressbar: bool = True,
187
+ model: pm.Model | None = None,
188
+ compile_kwargs: dict | None = None,
189
+ ) -> az.InferenceData:
190
+ """
191
+ Add observed and constant data to an InferenceData object.
192
+
193
+ Parameters
194
+ ----------
195
+ idata: az.InferenceData
196
+ An InferenceData object containing the approximated posterior samples.
197
+ progressbar: bool
198
+ Whether to display a progress bar during computations. Default is True.
199
+ model: Model, optional
200
+ A PyMC model. If None, the model is taken from the current model context.
201
+ compile_kwargs: dict, optional
202
+ Additional keyword arguments to pass to pytensor.function.
203
+
204
+ Returns
205
+ -------
206
+ idata: az.InferenceData
207
+ The provided InferenceData, with observed and constant data added.
208
+ """
209
+ model = pm.modelcontext(model) if model is None else model
210
+
211
+ if model.deterministics:
212
+ expand_dims = {}
213
+ if "chain" not in idata.posterior.coords:
214
+ expand_dims["chain"] = [0]
215
+ if "draw" not in idata.posterior.coords:
216
+ expand_dims["draw"] = [0]
217
+
218
+ idata.posterior = pm.compute_deterministics(
219
+ idata.posterior.expand_dims(expand_dims),
220
+ model=model,
221
+ merge_dataset=True,
222
+ progressbar=progressbar,
223
+ compile_kwargs=compile_kwargs,
224
+ )
225
+
226
+ coords, dims = coords_and_dims_for_inferencedata(model)
227
+
228
+ observed_data = dict_to_dataset(
229
+ find_observations(model),
230
+ library=pm,
231
+ coords=coords,
232
+ dims=dims,
233
+ default_dims=[],
234
+ )
235
+
236
+ constant_data = dict_to_dataset(
237
+ find_constants(model),
238
+ library=pm,
239
+ coords=coords,
240
+ dims=dims,
241
+ default_dims=[],
242
+ )
243
+
244
+ idata.add_groups(
245
+ {"observed_data": observed_data, "constant_data": constant_data},
246
+ coords=coords,
247
+ dims=dims,
248
+ )
249
+
250
+ return idata
251
+
252
+
253
+ def optimizer_result_to_dataset(
254
+ result: OptimizeResult,
255
+ method: minimize_method | Literal["basinhopping"],
256
+ mu: RaveledVars | None = None,
257
+ model: pm.Model | None = None,
258
+ ) -> xr.Dataset:
259
+ """
260
+ Convert an OptimizeResult object to an xarray Dataset object.
261
+
262
+ Parameters
263
+ ----------
264
+ result: OptimizeResult
265
+ The result of the optimization process.
266
+ method: minimize_method or "basinhopping"
267
+ The optimization method used.
268
+
269
+ Returns
270
+ -------
271
+ dataset: xr.Dataset
272
+ An xarray Dataset containing the optimization results.
273
+ """
274
+ if not isinstance(result, OptimizeResult):
275
+ raise TypeError("result must be an instance of OptimizeResult")
276
+
277
+ model = pm.modelcontext(model) if model is None else model
278
+ variable_names, *_ = zip(*mu.point_map_info)
279
+ unpacked_variable_names = make_unpacked_variable_names(variable_names, model)
280
+
281
+ data_vars = {}
282
+
283
+ if hasattr(result, "lowest_optimization_result"):
284
+ # If we did basinhopping, there's a results inside the results. We want to pop this out and collapse them,
285
+ # overwriting outer keys with the inner keys
286
+ inner_res = result.pop("lowest_optimization_result")
287
+ for key in inner_res.keys():
288
+ result[key] = inner_res[key]
289
+
290
+ if hasattr(result, "x"):
291
+ data_vars["x"] = xr.DataArray(
292
+ result.x, dims=["variables"], coords={"variables": unpacked_variable_names}
293
+ )
294
+ if hasattr(result, "fun"):
295
+ data_vars["fun"] = xr.DataArray(result.fun, dims=[])
296
+ if hasattr(result, "success"):
297
+ data_vars["success"] = xr.DataArray(result.success, dims=[])
298
+ if hasattr(result, "message"):
299
+ data_vars["message"] = xr.DataArray(str(result.message), dims=[])
300
+ if hasattr(result, "jac") and result.jac is not None:
301
+ jac = np.asarray(result.jac)
302
+ if jac.ndim == 1:
303
+ data_vars["jac"] = xr.DataArray(
304
+ jac, dims=["variables"], coords={"variables": unpacked_variable_names}
305
+ )
306
+ else:
307
+ data_vars["jac"] = xr.DataArray(
308
+ jac,
309
+ dims=["variables", "variables_aux"],
310
+ coords={
311
+ "variables": unpacked_variable_names,
312
+ "variables_aux": unpacked_variable_names,
313
+ },
314
+ )
315
+
316
+ if hasattr(result, "hess_inv") and result.hess_inv is not None:
317
+ hess_inv = result.hess_inv
318
+ if isinstance(hess_inv, LinearOperator):
319
+ n = hess_inv.shape[0]
320
+ eye = np.eye(n)
321
+ hess_inv_mat = np.column_stack([hess_inv.matvec(eye[:, i]) for i in range(n)])
322
+ hess_inv = hess_inv_mat
323
+ else:
324
+ hess_inv = np.asarray(hess_inv)
325
+ data_vars["hess_inv"] = xr.DataArray(
326
+ hess_inv,
327
+ dims=["variables", "variables_aux"],
328
+ coords={"variables": unpacked_variable_names, "variables_aux": unpacked_variable_names},
329
+ )
330
+
331
+ if hasattr(result, "nit"):
332
+ data_vars["nit"] = xr.DataArray(result.nit, dims=[])
333
+ if hasattr(result, "nfev"):
334
+ data_vars["nfev"] = xr.DataArray(result.nfev, dims=[])
335
+ if hasattr(result, "njev"):
336
+ data_vars["njev"] = xr.DataArray(result.njev, dims=[])
337
+ if hasattr(result, "status"):
338
+ data_vars["status"] = xr.DataArray(result.status, dims=[])
339
+
340
+ # Add any other fields present in result
341
+ for key, value in result.items():
342
+ if key in data_vars:
343
+ continue # already added
344
+ if value is None:
345
+ continue
346
+ arr = np.asarray(value)
347
+
348
+ # TODO: We can probably do something smarter here with a dictionary of all possible values and their expected
349
+ # dimensions.
350
+ dims = [f"{key}_dim_{i}" for i in range(arr.ndim)]
351
+ data_vars[key] = xr.DataArray(
352
+ arr,
353
+ dims=dims,
354
+ coords={f"{key}_dim_{i}": np.arange(arr.shape[i]) for i in range(len(dims))},
355
+ )
356
+
357
+ data_vars["method"] = xr.DataArray(np.array(method), dims=[])
358
+
359
+ return xr.Dataset(data_vars)
360
+
361
+
362
+ def add_optimizer_result_to_inference_data(
363
+ idata: az.InferenceData,
364
+ result: OptimizeResult,
365
+ method: minimize_method | Literal["basinhopping"],
366
+ mu: RaveledVars | None = None,
367
+ model: pm.Model | None = None,
368
+ ) -> az.InferenceData:
369
+ """
370
+ Add the optimization result to an InferenceData object.
371
+
372
+ Parameters
373
+ ----------
374
+ idata: az.InferenceData
375
+ An InferenceData object containing the approximated posterior samples.
376
+ result: OptimizeResult
377
+ The result of the optimization process.
378
+ method: minimize_method or "basinhopping"
379
+ The optimization method used.
380
+ mu: RaveledVars, optional
381
+ The MAP estimate of the model parameters.
382
+ model: Model, optional
383
+ A PyMC model. If None, the model is taken from the current model context.
384
+
385
+ Returns
386
+ -------
387
+ idata: az.InferenceData
388
+ The provided InferenceData, with the optimization results added to the "optimizer" group.
389
+ """
390
+ dataset = optimizer_result_to_dataset(result, method=method, mu=mu, model=model)
391
+ idata.add_groups({"optimizer_result": dataset})
392
+
393
+ return idata