pymc-extras 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymc_extras/__init__.py +5 -1
- pymc_extras/distributions/continuous.py +3 -2
- pymc_extras/distributions/discrete.py +3 -1
- pymc_extras/inference/find_map.py +62 -17
- pymc_extras/inference/laplace.py +10 -7
- pymc_extras/statespace/core/statespace.py +191 -52
- pymc_extras/statespace/filters/distributions.py +15 -16
- pymc_extras/statespace/filters/kalman_filter.py +1 -18
- pymc_extras/statespace/filters/kalman_smoother.py +2 -6
- pymc_extras/statespace/models/ETS.py +10 -0
- pymc_extras/statespace/models/SARIMAX.py +26 -5
- pymc_extras/statespace/models/VARMAX.py +12 -2
- pymc_extras/statespace/models/structural.py +18 -5
- pymc_extras-0.2.6.dist-info/METADATA +318 -0
- pymc_extras-0.2.6.dist-info/RECORD +65 -0
- {pymc_extras-0.2.5.dist-info → pymc_extras-0.2.6.dist-info}/WHEEL +1 -2
- pymc_extras/version.py +0 -11
- pymc_extras/version.txt +0 -1
- pymc_extras-0.2.5.dist-info/METADATA +0 -112
- pymc_extras-0.2.5.dist-info/RECORD +0 -108
- pymc_extras-0.2.5.dist-info/top_level.txt +0 -2
- tests/__init__.py +0 -13
- tests/distributions/__init__.py +0 -19
- tests/distributions/test_continuous.py +0 -185
- tests/distributions/test_discrete.py +0 -210
- tests/distributions/test_discrete_markov_chain.py +0 -258
- tests/distributions/test_multivariate.py +0 -304
- tests/distributions/test_transform.py +0 -77
- tests/model/__init__.py +0 -0
- tests/model/marginal/__init__.py +0 -0
- tests/model/marginal/test_distributions.py +0 -132
- tests/model/marginal/test_graph_analysis.py +0 -182
- tests/model/marginal/test_marginal_model.py +0 -967
- tests/model/test_model_api.py +0 -38
- tests/statespace/__init__.py +0 -0
- tests/statespace/test_ETS.py +0 -411
- tests/statespace/test_SARIMAX.py +0 -405
- tests/statespace/test_VARMAX.py +0 -184
- tests/statespace/test_coord_assignment.py +0 -181
- tests/statespace/test_distributions.py +0 -270
- tests/statespace/test_kalman_filter.py +0 -326
- tests/statespace/test_representation.py +0 -175
- tests/statespace/test_statespace.py +0 -872
- tests/statespace/test_statespace_JAX.py +0 -156
- tests/statespace/test_structural.py +0 -836
- tests/statespace/utilities/__init__.py +0 -0
- tests/statespace/utilities/shared_fixtures.py +0 -9
- tests/statespace/utilities/statsmodel_local_level.py +0 -42
- tests/statespace/utilities/test_helpers.py +0 -310
- tests/test_blackjax_smc.py +0 -222
- tests/test_find_map.py +0 -103
- tests/test_histogram_approximation.py +0 -109
- tests/test_laplace.py +0 -281
- tests/test_linearmodel.py +0 -208
- tests/test_model_builder.py +0 -306
- tests/test_pathfinder.py +0 -297
- tests/test_pivoted_cholesky.py +0 -24
- tests/test_printing.py +0 -98
- tests/test_prior_from_trace.py +0 -172
- tests/test_splines.py +0 -77
- tests/utils.py +0 -0
- {pymc_extras-0.2.5.dist-info → pymc_extras-0.2.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import warnings
|
|
2
3
|
|
|
3
4
|
from collections.abc import Callable, Sequence
|
|
4
|
-
from typing import Any
|
|
5
|
+
from typing import Any, Literal
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import pandas as pd
|
|
@@ -14,7 +15,6 @@ from pymc.model import modelcontext
|
|
|
14
15
|
from pymc.model.transform.optimization import freeze_dims_and_data
|
|
15
16
|
from pymc.util import RandomState
|
|
16
17
|
from pytensor import Variable, graph_replace
|
|
17
|
-
from pytensor.compile import get_mode
|
|
18
18
|
from rich.box import SIMPLE_HEAD
|
|
19
19
|
from rich.console import Console
|
|
20
20
|
from rich.table import Table
|
|
@@ -99,6 +99,13 @@ class PyMCStateSpace:
|
|
|
99
99
|
compute the observation errors. If False, these errors are deterministically zero; if True, they are sampled
|
|
100
100
|
from a multivariate normal.
|
|
101
101
|
|
|
102
|
+
mode: str or Mode, optional
|
|
103
|
+
Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and
|
|
104
|
+
``forecast``. The mode does **not** effect calls to ``pm.sample``.
|
|
105
|
+
|
|
106
|
+
Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument
|
|
107
|
+
to all sampling methods.
|
|
108
|
+
|
|
102
109
|
Notes
|
|
103
110
|
-----
|
|
104
111
|
Based on the statsmodels statespace implementation https://github.com/statsmodels/statsmodels/blob/main/statsmodels/tsa/statespace/representation.py,
|
|
@@ -221,8 +228,8 @@ class PyMCStateSpace:
|
|
|
221
228
|
filter_type: str = "standard",
|
|
222
229
|
verbose: bool = True,
|
|
223
230
|
measurement_error: bool = False,
|
|
231
|
+
mode: str | None = None,
|
|
224
232
|
):
|
|
225
|
-
self._fit_mode: str | None = None
|
|
226
233
|
self._fit_coords: dict[str, Sequence[str]] | None = None
|
|
227
234
|
self._fit_dims: dict[str, Sequence[str]] | None = None
|
|
228
235
|
self._fit_data: pt.TensorVariable | None = None
|
|
@@ -237,6 +244,7 @@ class PyMCStateSpace:
|
|
|
237
244
|
self.k_states = k_states
|
|
238
245
|
self.k_posdef = k_posdef
|
|
239
246
|
self.measurement_error = measurement_error
|
|
247
|
+
self.mode = mode
|
|
240
248
|
|
|
241
249
|
# All models contain a state space representation and a Kalman filter
|
|
242
250
|
self.ssm = PytensorRepresentation(k_endog, k_states, k_posdef)
|
|
@@ -819,10 +827,11 @@ class PyMCStateSpace:
|
|
|
819
827
|
self,
|
|
820
828
|
data: np.ndarray | pd.DataFrame | pt.TensorVariable,
|
|
821
829
|
register_data: bool = True,
|
|
822
|
-
mode: str | None = None,
|
|
823
830
|
missing_fill_value: float | None = None,
|
|
824
831
|
cov_jitter: float | None = JITTER_DEFAULT,
|
|
832
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
825
833
|
save_kalman_filter_outputs_in_idata: bool = False,
|
|
834
|
+
mode: str | None = None,
|
|
826
835
|
) -> None:
|
|
827
836
|
"""
|
|
828
837
|
Given a parameter vector `theta`, constructs the full computational graph describing the state space model and
|
|
@@ -865,10 +874,36 @@ class PyMCStateSpace:
|
|
|
865
874
|
|
|
866
875
|
- The Univariate Filter is more robust than other filters, and can tolerate a lower jitter value
|
|
867
876
|
|
|
877
|
+
mvn_method: str, default "svd"
|
|
878
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
879
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
880
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
881
|
+
|
|
882
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
883
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
884
|
+
|
|
868
885
|
save_kalman_filter_outputs_in_idata: bool, optional, default=False
|
|
869
886
|
If True, Kalman Filter outputs will be saved in the model as deterministics. Useful for debugging, but
|
|
870
887
|
should not be necessary for the majority of users.
|
|
888
|
+
|
|
889
|
+
mode: str, optional
|
|
890
|
+
Pytensor mode to use when compiling the graph. This will be saved as a model attribute and used when
|
|
891
|
+
compiling sampling functions (e.g. ``sample_conditional_prior``).
|
|
892
|
+
|
|
893
|
+
.. deprecated:: 0.2.5
|
|
894
|
+
The `mode` argument is deprecated and will be removed in a future version. Pass ``mode`` to the
|
|
895
|
+
model constructor, or manually specify ``compile_kwargs`` in sampling functions instead.
|
|
896
|
+
|
|
871
897
|
"""
|
|
898
|
+
if mode is not None:
|
|
899
|
+
warnings.warn(
|
|
900
|
+
"The `mode` argument is deprecated and will be removed in a future version. "
|
|
901
|
+
"Pass `mode` to the model constructor, or manually specify `compile_kwargs` in sampling functions"
|
|
902
|
+
" instead.",
|
|
903
|
+
DeprecationWarning,
|
|
904
|
+
)
|
|
905
|
+
self.mode = mode
|
|
906
|
+
|
|
872
907
|
pm_mod = modelcontext(None)
|
|
873
908
|
|
|
874
909
|
self._insert_random_variables()
|
|
@@ -889,7 +924,6 @@ class PyMCStateSpace:
|
|
|
889
924
|
filter_outputs = self.kalman_filter.build_graph(
|
|
890
925
|
pt.as_tensor_variable(data),
|
|
891
926
|
*self.unpack_statespace(),
|
|
892
|
-
mode=mode,
|
|
893
927
|
missing_fill_value=missing_fill_value,
|
|
894
928
|
cov_jitter=cov_jitter,
|
|
895
929
|
)
|
|
@@ -900,7 +934,7 @@ class PyMCStateSpace:
|
|
|
900
934
|
filtered_covariances, predicted_covariances, observed_covariances = covs
|
|
901
935
|
if save_kalman_filter_outputs_in_idata:
|
|
902
936
|
smooth_states, smooth_covariances = self._build_smoother_graph(
|
|
903
|
-
filtered_states, filtered_covariances, self.unpack_statespace()
|
|
937
|
+
filtered_states, filtered_covariances, self.unpack_statespace()
|
|
904
938
|
)
|
|
905
939
|
all_kf_outputs = [*states, smooth_states, *covs, smooth_covariances]
|
|
906
940
|
self._register_kalman_filter_outputs_with_pymc_model(all_kf_outputs)
|
|
@@ -915,11 +949,11 @@ class PyMCStateSpace:
|
|
|
915
949
|
logp=logp,
|
|
916
950
|
observed=data,
|
|
917
951
|
dims=obs_dims,
|
|
952
|
+
method=mvn_method,
|
|
918
953
|
)
|
|
919
954
|
|
|
920
955
|
self._fit_coords = pm_mod.coords.copy()
|
|
921
956
|
self._fit_dims = pm_mod.named_vars_to_dims.copy()
|
|
922
|
-
self._fit_mode = mode
|
|
923
957
|
|
|
924
958
|
def _build_smoother_graph(
|
|
925
959
|
self,
|
|
@@ -964,7 +998,7 @@ class PyMCStateSpace:
|
|
|
964
998
|
*_, T, Z, R, H, Q = matrices
|
|
965
999
|
|
|
966
1000
|
smooth_states, smooth_covariances = self.kalman_smoother.build_graph(
|
|
967
|
-
T, R, Q, filtered_states, filtered_covariances,
|
|
1001
|
+
T, R, Q, filtered_states, filtered_covariances, cov_jitter=cov_jitter
|
|
968
1002
|
)
|
|
969
1003
|
smooth_states.name = "smooth_states"
|
|
970
1004
|
smooth_covariances.name = "smooth_covariances"
|
|
@@ -1027,6 +1061,9 @@ class PyMCStateSpace:
|
|
|
1027
1061
|
provided when the model was built.
|
|
1028
1062
|
data_dims: str or tuple of str, optional
|
|
1029
1063
|
Dimension names associated with the model data. If None, defaults to ("time", "obs_state")
|
|
1064
|
+
scenario: dict[str, pd.DataFrame], optional
|
|
1065
|
+
Dictionary of out-of-sample scenario dataframes. If provided, it must have values for all data variables
|
|
1066
|
+
in the model. pm.set_data is used to replace training data with new values.
|
|
1030
1067
|
|
|
1031
1068
|
Returns
|
|
1032
1069
|
-------
|
|
@@ -1079,7 +1116,6 @@ class PyMCStateSpace:
|
|
|
1079
1116
|
R,
|
|
1080
1117
|
H,
|
|
1081
1118
|
Q,
|
|
1082
|
-
mode=self._fit_mode,
|
|
1083
1119
|
)
|
|
1084
1120
|
|
|
1085
1121
|
filter_outputs.pop(-1)
|
|
@@ -1089,7 +1125,7 @@ class PyMCStateSpace:
|
|
|
1089
1125
|
filtered_covariances, predicted_covariances, _ = covariances
|
|
1090
1126
|
|
|
1091
1127
|
[smoothed_states, smoothed_covariances] = self.kalman_smoother.build_graph(
|
|
1092
|
-
T, R, Q, filtered_states, filtered_covariances
|
|
1128
|
+
T, R, Q, filtered_states, filtered_covariances
|
|
1093
1129
|
)
|
|
1094
1130
|
|
|
1095
1131
|
grouped_outputs = [
|
|
@@ -1106,6 +1142,7 @@ class PyMCStateSpace:
|
|
|
1106
1142
|
group: str,
|
|
1107
1143
|
random_seed: RandomState | None = None,
|
|
1108
1144
|
data: pt.TensorLike | None = None,
|
|
1145
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
1109
1146
|
**kwargs,
|
|
1110
1147
|
):
|
|
1111
1148
|
"""
|
|
@@ -1127,6 +1164,14 @@ class PyMCStateSpace:
|
|
|
1127
1164
|
Observed data on which to condition the model. If not provided, the function will use the data that was
|
|
1128
1165
|
provided when the model was built.
|
|
1129
1166
|
|
|
1167
|
+
mvn_method: str, default "svd"
|
|
1168
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
1169
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
1170
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
1171
|
+
|
|
1172
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
1173
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
1174
|
+
|
|
1130
1175
|
kwargs:
|
|
1131
1176
|
Additional keyword arguments are passed to pymc.sample_posterior_predictive
|
|
1132
1177
|
|
|
@@ -1142,6 +1187,9 @@ class PyMCStateSpace:
|
|
|
1142
1187
|
_verify_group(group)
|
|
1143
1188
|
group_idata = getattr(idata, group)
|
|
1144
1189
|
|
|
1190
|
+
compile_kwargs = kwargs.pop("compile_kwargs", {})
|
|
1191
|
+
compile_kwargs.setdefault("mode", self.mode)
|
|
1192
|
+
|
|
1145
1193
|
with pm.Model(coords=self._fit_coords) as forward_model:
|
|
1146
1194
|
(
|
|
1147
1195
|
[
|
|
@@ -1178,6 +1226,7 @@ class PyMCStateSpace:
|
|
|
1178
1226
|
covs=cov,
|
|
1179
1227
|
logp=dummy_ll,
|
|
1180
1228
|
dims=state_dims,
|
|
1229
|
+
method=mvn_method,
|
|
1181
1230
|
)
|
|
1182
1231
|
|
|
1183
1232
|
obs_mu = (Z @ mu[..., None]).squeeze(-1)
|
|
@@ -1189,6 +1238,7 @@ class PyMCStateSpace:
|
|
|
1189
1238
|
covs=obs_cov,
|
|
1190
1239
|
logp=dummy_ll,
|
|
1191
1240
|
dims=obs_dims,
|
|
1241
|
+
method=mvn_method,
|
|
1192
1242
|
)
|
|
1193
1243
|
|
|
1194
1244
|
# TODO: Remove this after pm.Flat initial values are fixed
|
|
@@ -1205,8 +1255,8 @@ class PyMCStateSpace:
|
|
|
1205
1255
|
for name in FILTER_OUTPUT_TYPES
|
|
1206
1256
|
for suffix in ["", "_observed"]
|
|
1207
1257
|
],
|
|
1208
|
-
compile_kwargs={"mode": get_mode(self._fit_mode)},
|
|
1209
1258
|
random_seed=random_seed,
|
|
1259
|
+
compile_kwargs=compile_kwargs,
|
|
1210
1260
|
**kwargs,
|
|
1211
1261
|
)
|
|
1212
1262
|
|
|
@@ -1219,6 +1269,7 @@ class PyMCStateSpace:
|
|
|
1219
1269
|
steps: int | None = None,
|
|
1220
1270
|
use_data_time_dim: bool = False,
|
|
1221
1271
|
random_seed: RandomState | None = None,
|
|
1272
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
1222
1273
|
**kwargs,
|
|
1223
1274
|
):
|
|
1224
1275
|
"""
|
|
@@ -1248,6 +1299,14 @@ class PyMCStateSpace:
|
|
|
1248
1299
|
random_seed : int, RandomState or Generator, optional
|
|
1249
1300
|
Seed for the random number generator.
|
|
1250
1301
|
|
|
1302
|
+
mvn_method: str, default "svd"
|
|
1303
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
1304
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
1305
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
1306
|
+
|
|
1307
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
1308
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
1309
|
+
|
|
1251
1310
|
kwargs:
|
|
1252
1311
|
Additional keyword arguments are passed to pymc.sample_posterior_predictive
|
|
1253
1312
|
|
|
@@ -1263,6 +1322,10 @@ class PyMCStateSpace:
|
|
|
1263
1322
|
the latent state trajectories: `y[t] = Z @ x[t] + nu[t]`, where `nu ~ N(0, H)`.
|
|
1264
1323
|
"""
|
|
1265
1324
|
_verify_group(group)
|
|
1325
|
+
|
|
1326
|
+
compile_kwargs = kwargs.pop("compile_kwargs", {})
|
|
1327
|
+
compile_kwargs.setdefault("mode", self.mode)
|
|
1328
|
+
|
|
1266
1329
|
group_idata = getattr(idata, group)
|
|
1267
1330
|
dims = None
|
|
1268
1331
|
temp_coords = self._fit_coords.copy()
|
|
@@ -1305,7 +1368,7 @@ class PyMCStateSpace:
|
|
|
1305
1368
|
*matrices,
|
|
1306
1369
|
steps=steps,
|
|
1307
1370
|
dims=dims,
|
|
1308
|
-
|
|
1371
|
+
method=mvn_method,
|
|
1309
1372
|
sequence_names=self.kalman_filter.seq_names,
|
|
1310
1373
|
k_endog=self.k_endog,
|
|
1311
1374
|
)
|
|
@@ -1320,15 +1383,19 @@ class PyMCStateSpace:
|
|
|
1320
1383
|
idata_unconditional = pm.sample_posterior_predictive(
|
|
1321
1384
|
group_idata,
|
|
1322
1385
|
var_names=[f"{group}_latent", f"{group}_observed"],
|
|
1323
|
-
compile_kwargs={"mode": self._fit_mode},
|
|
1324
1386
|
random_seed=random_seed,
|
|
1387
|
+
compile_kwargs=compile_kwargs,
|
|
1325
1388
|
**kwargs,
|
|
1326
1389
|
)
|
|
1327
1390
|
|
|
1328
1391
|
return idata_unconditional.posterior_predictive
|
|
1329
1392
|
|
|
1330
1393
|
def sample_conditional_prior(
|
|
1331
|
-
self,
|
|
1394
|
+
self,
|
|
1395
|
+
idata: InferenceData,
|
|
1396
|
+
random_seed: RandomState | None = None,
|
|
1397
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
1398
|
+
**kwargs,
|
|
1332
1399
|
) -> InferenceData:
|
|
1333
1400
|
"""
|
|
1334
1401
|
Sample from the conditional prior; that is, given parameter draws from the prior distribution,
|
|
@@ -1344,6 +1411,14 @@ class PyMCStateSpace:
|
|
|
1344
1411
|
random_seed : int, RandomState or Generator, optional
|
|
1345
1412
|
Seed for the random number generator.
|
|
1346
1413
|
|
|
1414
|
+
mvn_method: str, default "svd"
|
|
1415
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
1416
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
1417
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
1418
|
+
|
|
1419
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
1420
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
1421
|
+
|
|
1347
1422
|
kwargs:
|
|
1348
1423
|
Additional keyword arguments are passed to pymc.sample_posterior_predictive
|
|
1349
1424
|
|
|
@@ -1355,10 +1430,16 @@ class PyMCStateSpace:
|
|
|
1355
1430
|
"predicted_prior", and "smoothed_prior".
|
|
1356
1431
|
"""
|
|
1357
1432
|
|
|
1358
|
-
return self._sample_conditional(
|
|
1433
|
+
return self._sample_conditional(
|
|
1434
|
+
idata=idata, group="prior", random_seed=random_seed, mvn_method=mvn_method, **kwargs
|
|
1435
|
+
)
|
|
1359
1436
|
|
|
1360
1437
|
def sample_conditional_posterior(
|
|
1361
|
-
self,
|
|
1438
|
+
self,
|
|
1439
|
+
idata: InferenceData,
|
|
1440
|
+
random_seed: RandomState | None = None,
|
|
1441
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
1442
|
+
**kwargs,
|
|
1362
1443
|
):
|
|
1363
1444
|
"""
|
|
1364
1445
|
Sample from the conditional posterior; that is, given parameter draws from the posterior distribution,
|
|
@@ -1373,6 +1454,14 @@ class PyMCStateSpace:
|
|
|
1373
1454
|
random_seed : int, RandomState or Generator, optional
|
|
1374
1455
|
Seed for the random number generator.
|
|
1375
1456
|
|
|
1457
|
+
mvn_method: str, default "svd"
|
|
1458
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
1459
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
1460
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
1461
|
+
|
|
1462
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
1463
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
1464
|
+
|
|
1376
1465
|
kwargs:
|
|
1377
1466
|
Additional keyword arguments are passed to pymc.sample_posterior_predictive
|
|
1378
1467
|
|
|
@@ -1384,7 +1473,9 @@ class PyMCStateSpace:
|
|
|
1384
1473
|
"predicted_posterior", and "smoothed_posterior".
|
|
1385
1474
|
"""
|
|
1386
1475
|
|
|
1387
|
-
return self._sample_conditional(
|
|
1476
|
+
return self._sample_conditional(
|
|
1477
|
+
idata=idata, group="posterior", random_seed=random_seed, mvn_method=mvn_method, **kwargs
|
|
1478
|
+
)
|
|
1388
1479
|
|
|
1389
1480
|
def sample_unconditional_prior(
|
|
1390
1481
|
self,
|
|
@@ -1392,6 +1483,7 @@ class PyMCStateSpace:
|
|
|
1392
1483
|
steps: int | None = None,
|
|
1393
1484
|
use_data_time_dim: bool = False,
|
|
1394
1485
|
random_seed: RandomState | None = None,
|
|
1486
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
1395
1487
|
**kwargs,
|
|
1396
1488
|
) -> InferenceData:
|
|
1397
1489
|
"""
|
|
@@ -1420,6 +1512,14 @@ class PyMCStateSpace:
|
|
|
1420
1512
|
random_seed : int, RandomState or Generator, optional
|
|
1421
1513
|
Seed for the random number generator.
|
|
1422
1514
|
|
|
1515
|
+
mvn_method: str, default "svd"
|
|
1516
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
1517
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
1518
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
1519
|
+
|
|
1520
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
1521
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
1522
|
+
|
|
1423
1523
|
kwargs:
|
|
1424
1524
|
Additional keyword arguments are passed to pymc.sample_posterior_predictive
|
|
1425
1525
|
|
|
@@ -1436,7 +1536,13 @@ class PyMCStateSpace:
|
|
|
1436
1536
|
"""
|
|
1437
1537
|
|
|
1438
1538
|
return self._sample_unconditional(
|
|
1439
|
-
idata,
|
|
1539
|
+
idata=idata,
|
|
1540
|
+
group="prior",
|
|
1541
|
+
steps=steps,
|
|
1542
|
+
use_data_time_dim=use_data_time_dim,
|
|
1543
|
+
random_seed=random_seed,
|
|
1544
|
+
mvn_method=mvn_method,
|
|
1545
|
+
**kwargs,
|
|
1440
1546
|
)
|
|
1441
1547
|
|
|
1442
1548
|
def sample_unconditional_posterior(
|
|
@@ -1445,6 +1551,7 @@ class PyMCStateSpace:
|
|
|
1445
1551
|
steps: int | None = None,
|
|
1446
1552
|
use_data_time_dim: bool = False,
|
|
1447
1553
|
random_seed: RandomState | None = None,
|
|
1554
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
1448
1555
|
**kwargs,
|
|
1449
1556
|
) -> InferenceData:
|
|
1450
1557
|
"""
|
|
@@ -1474,6 +1581,14 @@ class PyMCStateSpace:
|
|
|
1474
1581
|
random_seed : int, RandomState or Generator, optional
|
|
1475
1582
|
Seed for the random number generator.
|
|
1476
1583
|
|
|
1584
|
+
mvn_method: str, default "svd"
|
|
1585
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
1586
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
1587
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
1588
|
+
|
|
1589
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
1590
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
1591
|
+
|
|
1477
1592
|
Returns
|
|
1478
1593
|
-------
|
|
1479
1594
|
InferenceData
|
|
@@ -1487,11 +1602,17 @@ class PyMCStateSpace:
|
|
|
1487
1602
|
"""
|
|
1488
1603
|
|
|
1489
1604
|
return self._sample_unconditional(
|
|
1490
|
-
idata,
|
|
1605
|
+
idata=idata,
|
|
1606
|
+
group="posterior",
|
|
1607
|
+
steps=steps,
|
|
1608
|
+
use_data_time_dim=use_data_time_dim,
|
|
1609
|
+
random_seed=random_seed,
|
|
1610
|
+
mvn_method=mvn_method,
|
|
1611
|
+
**kwargs,
|
|
1491
1612
|
)
|
|
1492
1613
|
|
|
1493
1614
|
def sample_statespace_matrices(
|
|
1494
|
-
self, idata, matrix_names: str | list[str] | None, group: str = "posterior"
|
|
1615
|
+
self, idata, matrix_names: str | list[str] | None, group: str = "posterior", **kwargs
|
|
1495
1616
|
):
|
|
1496
1617
|
"""
|
|
1497
1618
|
Draw samples of requested statespace matrices from provided idata
|
|
@@ -1508,12 +1629,18 @@ class PyMCStateSpace:
|
|
|
1508
1629
|
group: str, one of "posterior" or "prior"
|
|
1509
1630
|
Whether to sample from priors or posteriors
|
|
1510
1631
|
|
|
1632
|
+
kwargs:
|
|
1633
|
+
Additional keyword arguments are passed to ``pymc.sample_posterior_predictive``
|
|
1634
|
+
|
|
1511
1635
|
Returns
|
|
1512
1636
|
-------
|
|
1513
1637
|
idata_matrices: az.InterenceData
|
|
1514
1638
|
"""
|
|
1515
1639
|
_verify_group(group)
|
|
1516
1640
|
|
|
1641
|
+
compile_kwargs = kwargs.pop("compile_kwargs", {})
|
|
1642
|
+
compile_kwargs.setdefault("mode", self.mode)
|
|
1643
|
+
|
|
1517
1644
|
if matrix_names is None:
|
|
1518
1645
|
matrix_names = MATRIX_NAMES
|
|
1519
1646
|
elif isinstance(matrix_names, str):
|
|
@@ -1544,8 +1671,9 @@ class PyMCStateSpace:
|
|
|
1544
1671
|
matrix_idata = pm.sample_posterior_predictive(
|
|
1545
1672
|
idata if group == "posterior" else idata.prior,
|
|
1546
1673
|
var_names=matrix_names,
|
|
1547
|
-
compile_kwargs={"mode": self._fit_mode},
|
|
1548
1674
|
extend_inferencedata=False,
|
|
1675
|
+
compile_kwargs=compile_kwargs,
|
|
1676
|
+
**kwargs,
|
|
1549
1677
|
)
|
|
1550
1678
|
|
|
1551
1679
|
return matrix_idata
|
|
@@ -1567,8 +1695,10 @@ class PyMCStateSpace:
|
|
|
1567
1695
|
raise ValueError(
|
|
1568
1696
|
"Integer start must be within the range of the data index used to fit the model."
|
|
1569
1697
|
)
|
|
1570
|
-
if periods is None and end is None:
|
|
1571
|
-
raise ValueError(
|
|
1698
|
+
if periods is None and end is None and not use_scenario_index:
|
|
1699
|
+
raise ValueError(
|
|
1700
|
+
"Must specify one of either periods or end unless use_scenario_index=True"
|
|
1701
|
+
)
|
|
1572
1702
|
if periods is not None and end is not None:
|
|
1573
1703
|
raise ValueError("Must specify exactly one of either periods or end")
|
|
1574
1704
|
if scenario is None and use_scenario_index:
|
|
@@ -1928,6 +2058,7 @@ class PyMCStateSpace:
|
|
|
1928
2058
|
filter_output="smoothed",
|
|
1929
2059
|
random_seed: RandomState | None = None,
|
|
1930
2060
|
verbose: bool = True,
|
|
2061
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
1931
2062
|
**kwargs,
|
|
1932
2063
|
) -> InferenceData:
|
|
1933
2064
|
"""
|
|
@@ -1984,6 +2115,14 @@ class PyMCStateSpace:
|
|
|
1984
2115
|
verbose: bool, default=True
|
|
1985
2116
|
Whether to print diagnostic information about forecasting.
|
|
1986
2117
|
|
|
2118
|
+
mvn_method: str, default "svd"
|
|
2119
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
2120
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
2121
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
2122
|
+
|
|
2123
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
2124
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
2125
|
+
|
|
1987
2126
|
kwargs:
|
|
1988
2127
|
Additional keyword arguments are passed to pymc.sample_posterior_predictive
|
|
1989
2128
|
|
|
@@ -2003,6 +2142,10 @@ class PyMCStateSpace:
|
|
|
2003
2142
|
filter_time_dim = TIME_DIM
|
|
2004
2143
|
|
|
2005
2144
|
_validate_filter_arg(filter_output)
|
|
2145
|
+
|
|
2146
|
+
compile_kwargs = kwargs.pop("compile_kwargs", {})
|
|
2147
|
+
compile_kwargs.setdefault("mode", self.mode)
|
|
2148
|
+
|
|
2006
2149
|
time_index = self._get_fit_time_index()
|
|
2007
2150
|
|
|
2008
2151
|
if start is None and verbose:
|
|
@@ -2060,9 +2203,18 @@ class PyMCStateSpace:
|
|
|
2060
2203
|
|
|
2061
2204
|
with pm.Model(coords=temp_coords) as forecast_model:
|
|
2062
2205
|
(_, _, *matrices), grouped_outputs = self._kalman_filter_outputs_from_dummy_graph(
|
|
2206
|
+
scenario=scenario,
|
|
2063
2207
|
data_dims=["data_time", OBS_STATE_DIM],
|
|
2064
2208
|
)
|
|
2065
2209
|
|
|
2210
|
+
for name in self.data_names:
|
|
2211
|
+
if name in scenario.keys():
|
|
2212
|
+
pm.set_data(
|
|
2213
|
+
{"data": np.zeros((len(forecast_index), self.k_endog))},
|
|
2214
|
+
coords={"data_time": np.arange(len(forecast_index))},
|
|
2215
|
+
)
|
|
2216
|
+
break
|
|
2217
|
+
|
|
2066
2218
|
group_idx = FILTER_OUTPUT_TYPES.index(filter_output)
|
|
2067
2219
|
mu, cov = grouped_outputs[group_idx]
|
|
2068
2220
|
|
|
@@ -2073,17 +2225,6 @@ class PyMCStateSpace:
|
|
|
2073
2225
|
"P0_slice", cov[t0_idx], dims=cov_dims[1:] if cov_dims is not None else None
|
|
2074
2226
|
)
|
|
2075
2227
|
|
|
2076
|
-
if scenario is not None:
|
|
2077
|
-
sub_dict = {
|
|
2078
|
-
forecast_model[data_name]: pt.as_tensor_variable(
|
|
2079
|
-
scenario.get(data_name), name=data_name
|
|
2080
|
-
)
|
|
2081
|
-
for data_name in self.data_names
|
|
2082
|
-
}
|
|
2083
|
-
|
|
2084
|
-
matrices = graph_replace(matrices, replace=sub_dict, strict=True)
|
|
2085
|
-
[setattr(matrix, "name", name) for name, matrix in zip(MATRIX_NAMES[2:], matrices)]
|
|
2086
|
-
|
|
2087
2228
|
_ = LinearGaussianStateSpace(
|
|
2088
2229
|
"forecast",
|
|
2089
2230
|
x0,
|
|
@@ -2091,10 +2232,10 @@ class PyMCStateSpace:
|
|
|
2091
2232
|
*matrices,
|
|
2092
2233
|
steps=len(forecast_index),
|
|
2093
2234
|
dims=dims,
|
|
2094
|
-
mode=self._fit_mode,
|
|
2095
2235
|
sequence_names=self.kalman_filter.seq_names,
|
|
2096
2236
|
k_endog=self.k_endog,
|
|
2097
2237
|
append_x0=False,
|
|
2238
|
+
method=mvn_method,
|
|
2098
2239
|
)
|
|
2099
2240
|
|
|
2100
2241
|
forecast_model.rvs_to_initial_values = {
|
|
@@ -2106,8 +2247,8 @@ class PyMCStateSpace:
|
|
|
2106
2247
|
idata_forecast = pm.sample_posterior_predictive(
|
|
2107
2248
|
idata,
|
|
2108
2249
|
var_names=["forecast_latent", "forecast_observed"],
|
|
2109
|
-
compile_kwargs={"mode": self._fit_mode},
|
|
2110
2250
|
random_seed=random_seed,
|
|
2251
|
+
compile_kwargs=compile_kwargs,
|
|
2111
2252
|
**kwargs,
|
|
2112
2253
|
)
|
|
2113
2254
|
|
|
@@ -2123,6 +2264,7 @@ class PyMCStateSpace:
|
|
|
2123
2264
|
shock_trajectory: np.ndarray | None = None,
|
|
2124
2265
|
orthogonalize_shocks: bool = False,
|
|
2125
2266
|
random_seed: RandomState | None = None,
|
|
2267
|
+
mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
|
|
2126
2268
|
**kwargs,
|
|
2127
2269
|
):
|
|
2128
2270
|
"""
|
|
@@ -2174,6 +2316,14 @@ class PyMCStateSpace:
|
|
|
2174
2316
|
random_seed : int, RandomState or Generator, optional
|
|
2175
2317
|
Seed for the random number generator.
|
|
2176
2318
|
|
|
2319
|
+
mvn_method: str, default "svd"
|
|
2320
|
+
Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
|
|
2321
|
+
(or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
|
|
2322
|
+
to ill-conditioned matrices, while "svd" is slow but extremely robust.
|
|
2323
|
+
|
|
2324
|
+
In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
|
|
2325
|
+
recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
|
|
2326
|
+
|
|
2177
2327
|
kwargs:
|
|
2178
2328
|
Additional keyword arguments are passed to pymc.sample_posterior_predictive
|
|
2179
2329
|
|
|
@@ -2186,6 +2336,9 @@ class PyMCStateSpace:
|
|
|
2186
2336
|
n_options = sum(x is not None for x in options)
|
|
2187
2337
|
Q = None # No covariance matrix needed if a trajectory is provided. Will be overwritten later if needed.
|
|
2188
2338
|
|
|
2339
|
+
compile_kwargs = kwargs.pop("compile_kwargs", {})
|
|
2340
|
+
compile_kwargs.setdefault("mode", self.mode)
|
|
2341
|
+
|
|
2189
2342
|
if n_options > 1:
|
|
2190
2343
|
raise ValueError("Specify exactly 0 or 1 of shock_size, shock_cov, or shock_trajectory")
|
|
2191
2344
|
elif n_options == 1:
|
|
@@ -2233,7 +2386,7 @@ class PyMCStateSpace:
|
|
|
2233
2386
|
shock_trajectory = pt.zeros((n_steps, self.k_posdef))
|
|
2234
2387
|
if Q is not None:
|
|
2235
2388
|
init_shock = pm.MvNormal(
|
|
2236
|
-
"initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM], method=
|
|
2389
|
+
"initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM], method=mvn_method
|
|
2237
2390
|
)
|
|
2238
2391
|
else:
|
|
2239
2392
|
init_shock = pm.Deterministic(
|
|
@@ -2257,29 +2410,15 @@ class PyMCStateSpace:
|
|
|
2257
2410
|
non_sequences=[c, T, R],
|
|
2258
2411
|
n_steps=n_steps,
|
|
2259
2412
|
strict=True,
|
|
2260
|
-
mode=self._fit_mode,
|
|
2261
2413
|
)
|
|
2262
2414
|
|
|
2263
2415
|
pm.Deterministic("irf", irf, dims=[TIME_DIM, ALL_STATE_DIM])
|
|
2264
2416
|
|
|
2265
|
-
compile_kwargs = kwargs.get("compile_kwargs", {})
|
|
2266
|
-
if "mode" not in compile_kwargs.keys():
|
|
2267
|
-
compile_kwargs = {"mode": self._fit_mode}
|
|
2268
|
-
else:
|
|
2269
|
-
mode = compile_kwargs.get("mode")
|
|
2270
|
-
if mode is not None and mode != self._fit_mode:
|
|
2271
|
-
raise ValueError(
|
|
2272
|
-
f"User provided compile mode ({mode}) does not match the compile mode used to "
|
|
2273
|
-
f"construct the model ({self._fit_mode})."
|
|
2274
|
-
)
|
|
2275
|
-
|
|
2276
|
-
compile_kwargs.update({"mode": self._fit_mode})
|
|
2277
|
-
|
|
2278
2417
|
irf_idata = pm.sample_posterior_predictive(
|
|
2279
2418
|
idata,
|
|
2280
2419
|
var_names=["irf"],
|
|
2281
|
-
compile_kwargs=compile_kwargs,
|
|
2282
2420
|
random_seed=random_seed,
|
|
2421
|
+
compile_kwargs=compile_kwargs,
|
|
2283
2422
|
**kwargs,
|
|
2284
2423
|
)
|
|
2285
2424
|
|