pymc-extras 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. pymc_extras/__init__.py +5 -1
  2. pymc_extras/distributions/continuous.py +3 -2
  3. pymc_extras/distributions/discrete.py +3 -1
  4. pymc_extras/inference/find_map.py +62 -17
  5. pymc_extras/inference/laplace.py +10 -7
  6. pymc_extras/statespace/core/statespace.py +191 -52
  7. pymc_extras/statespace/filters/distributions.py +15 -16
  8. pymc_extras/statespace/filters/kalman_filter.py +1 -18
  9. pymc_extras/statespace/filters/kalman_smoother.py +2 -6
  10. pymc_extras/statespace/models/ETS.py +10 -0
  11. pymc_extras/statespace/models/SARIMAX.py +26 -5
  12. pymc_extras/statespace/models/VARMAX.py +12 -2
  13. pymc_extras/statespace/models/structural.py +18 -5
  14. pymc_extras-0.2.6.dist-info/METADATA +318 -0
  15. pymc_extras-0.2.6.dist-info/RECORD +65 -0
  16. {pymc_extras-0.2.5.dist-info → pymc_extras-0.2.6.dist-info}/WHEEL +1 -2
  17. pymc_extras/version.py +0 -11
  18. pymc_extras/version.txt +0 -1
  19. pymc_extras-0.2.5.dist-info/METADATA +0 -112
  20. pymc_extras-0.2.5.dist-info/RECORD +0 -108
  21. pymc_extras-0.2.5.dist-info/top_level.txt +0 -2
  22. tests/__init__.py +0 -13
  23. tests/distributions/__init__.py +0 -19
  24. tests/distributions/test_continuous.py +0 -185
  25. tests/distributions/test_discrete.py +0 -210
  26. tests/distributions/test_discrete_markov_chain.py +0 -258
  27. tests/distributions/test_multivariate.py +0 -304
  28. tests/distributions/test_transform.py +0 -77
  29. tests/model/__init__.py +0 -0
  30. tests/model/marginal/__init__.py +0 -0
  31. tests/model/marginal/test_distributions.py +0 -132
  32. tests/model/marginal/test_graph_analysis.py +0 -182
  33. tests/model/marginal/test_marginal_model.py +0 -967
  34. tests/model/test_model_api.py +0 -38
  35. tests/statespace/__init__.py +0 -0
  36. tests/statespace/test_ETS.py +0 -411
  37. tests/statespace/test_SARIMAX.py +0 -405
  38. tests/statespace/test_VARMAX.py +0 -184
  39. tests/statespace/test_coord_assignment.py +0 -181
  40. tests/statespace/test_distributions.py +0 -270
  41. tests/statespace/test_kalman_filter.py +0 -326
  42. tests/statespace/test_representation.py +0 -175
  43. tests/statespace/test_statespace.py +0 -872
  44. tests/statespace/test_statespace_JAX.py +0 -156
  45. tests/statespace/test_structural.py +0 -836
  46. tests/statespace/utilities/__init__.py +0 -0
  47. tests/statespace/utilities/shared_fixtures.py +0 -9
  48. tests/statespace/utilities/statsmodel_local_level.py +0 -42
  49. tests/statespace/utilities/test_helpers.py +0 -310
  50. tests/test_blackjax_smc.py +0 -222
  51. tests/test_find_map.py +0 -103
  52. tests/test_histogram_approximation.py +0 -109
  53. tests/test_laplace.py +0 -281
  54. tests/test_linearmodel.py +0 -208
  55. tests/test_model_builder.py +0 -306
  56. tests/test_pathfinder.py +0 -297
  57. tests/test_pivoted_cholesky.py +0 -24
  58. tests/test_printing.py +0 -98
  59. tests/test_prior_from_trace.py +0 -172
  60. tests/test_splines.py +0 -77
  61. tests/utils.py +0 -0
  62. {pymc_extras-0.2.5.dist-info → pymc_extras-0.2.6.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,8 @@
1
1
  import logging
2
+ import warnings
2
3
 
3
4
  from collections.abc import Callable, Sequence
4
- from typing import Any
5
+ from typing import Any, Literal
5
6
 
6
7
  import numpy as np
7
8
  import pandas as pd
@@ -14,7 +15,6 @@ from pymc.model import modelcontext
14
15
  from pymc.model.transform.optimization import freeze_dims_and_data
15
16
  from pymc.util import RandomState
16
17
  from pytensor import Variable, graph_replace
17
- from pytensor.compile import get_mode
18
18
  from rich.box import SIMPLE_HEAD
19
19
  from rich.console import Console
20
20
  from rich.table import Table
@@ -99,6 +99,13 @@ class PyMCStateSpace:
99
99
  compute the observation errors. If False, these errors are deterministically zero; if True, they are sampled
100
100
  from a multivariate normal.
101
101
 
102
+ mode: str or Mode, optional
103
+ Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and
104
+ ``forecast``. The mode does **not** effect calls to ``pm.sample``.
105
+
106
+ Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument
107
+ to all sampling methods.
108
+
102
109
  Notes
103
110
  -----
104
111
  Based on the statsmodels statespace implementation https://github.com/statsmodels/statsmodels/blob/main/statsmodels/tsa/statespace/representation.py,
@@ -221,8 +228,8 @@ class PyMCStateSpace:
221
228
  filter_type: str = "standard",
222
229
  verbose: bool = True,
223
230
  measurement_error: bool = False,
231
+ mode: str | None = None,
224
232
  ):
225
- self._fit_mode: str | None = None
226
233
  self._fit_coords: dict[str, Sequence[str]] | None = None
227
234
  self._fit_dims: dict[str, Sequence[str]] | None = None
228
235
  self._fit_data: pt.TensorVariable | None = None
@@ -237,6 +244,7 @@ class PyMCStateSpace:
237
244
  self.k_states = k_states
238
245
  self.k_posdef = k_posdef
239
246
  self.measurement_error = measurement_error
247
+ self.mode = mode
240
248
 
241
249
  # All models contain a state space representation and a Kalman filter
242
250
  self.ssm = PytensorRepresentation(k_endog, k_states, k_posdef)
@@ -819,10 +827,11 @@ class PyMCStateSpace:
819
827
  self,
820
828
  data: np.ndarray | pd.DataFrame | pt.TensorVariable,
821
829
  register_data: bool = True,
822
- mode: str | None = None,
823
830
  missing_fill_value: float | None = None,
824
831
  cov_jitter: float | None = JITTER_DEFAULT,
832
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
825
833
  save_kalman_filter_outputs_in_idata: bool = False,
834
+ mode: str | None = None,
826
835
  ) -> None:
827
836
  """
828
837
  Given a parameter vector `theta`, constructs the full computational graph describing the state space model and
@@ -865,10 +874,36 @@ class PyMCStateSpace:
865
874
 
866
875
  - The Univariate Filter is more robust than other filters, and can tolerate a lower jitter value
867
876
 
877
+ mvn_method: str, default "svd"
878
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
879
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
880
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
881
+
882
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
883
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
884
+
868
885
  save_kalman_filter_outputs_in_idata: bool, optional, default=False
869
886
  If True, Kalman Filter outputs will be saved in the model as deterministics. Useful for debugging, but
870
887
  should not be necessary for the majority of users.
888
+
889
+ mode: str, optional
890
+ Pytensor mode to use when compiling the graph. This will be saved as a model attribute and used when
891
+ compiling sampling functions (e.g. ``sample_conditional_prior``).
892
+
893
+ .. deprecated:: 0.2.5
894
+ The `mode` argument is deprecated and will be removed in a future version. Pass ``mode`` to the
895
+ model constructor, or manually specify ``compile_kwargs`` in sampling functions instead.
896
+
871
897
  """
898
+ if mode is not None:
899
+ warnings.warn(
900
+ "The `mode` argument is deprecated and will be removed in a future version. "
901
+ "Pass `mode` to the model constructor, or manually specify `compile_kwargs` in sampling functions"
902
+ " instead.",
903
+ DeprecationWarning,
904
+ )
905
+ self.mode = mode
906
+
872
907
  pm_mod = modelcontext(None)
873
908
 
874
909
  self._insert_random_variables()
@@ -889,7 +924,6 @@ class PyMCStateSpace:
889
924
  filter_outputs = self.kalman_filter.build_graph(
890
925
  pt.as_tensor_variable(data),
891
926
  *self.unpack_statespace(),
892
- mode=mode,
893
927
  missing_fill_value=missing_fill_value,
894
928
  cov_jitter=cov_jitter,
895
929
  )
@@ -900,7 +934,7 @@ class PyMCStateSpace:
900
934
  filtered_covariances, predicted_covariances, observed_covariances = covs
901
935
  if save_kalman_filter_outputs_in_idata:
902
936
  smooth_states, smooth_covariances = self._build_smoother_graph(
903
- filtered_states, filtered_covariances, self.unpack_statespace(), mode=mode
937
+ filtered_states, filtered_covariances, self.unpack_statespace()
904
938
  )
905
939
  all_kf_outputs = [*states, smooth_states, *covs, smooth_covariances]
906
940
  self._register_kalman_filter_outputs_with_pymc_model(all_kf_outputs)
@@ -915,11 +949,11 @@ class PyMCStateSpace:
915
949
  logp=logp,
916
950
  observed=data,
917
951
  dims=obs_dims,
952
+ method=mvn_method,
918
953
  )
919
954
 
920
955
  self._fit_coords = pm_mod.coords.copy()
921
956
  self._fit_dims = pm_mod.named_vars_to_dims.copy()
922
- self._fit_mode = mode
923
957
 
924
958
  def _build_smoother_graph(
925
959
  self,
@@ -964,7 +998,7 @@ class PyMCStateSpace:
964
998
  *_, T, Z, R, H, Q = matrices
965
999
 
966
1000
  smooth_states, smooth_covariances = self.kalman_smoother.build_graph(
967
- T, R, Q, filtered_states, filtered_covariances, mode=mode, cov_jitter=cov_jitter
1001
+ T, R, Q, filtered_states, filtered_covariances, cov_jitter=cov_jitter
968
1002
  )
969
1003
  smooth_states.name = "smooth_states"
970
1004
  smooth_covariances.name = "smooth_covariances"
@@ -1027,6 +1061,9 @@ class PyMCStateSpace:
1027
1061
  provided when the model was built.
1028
1062
  data_dims: str or tuple of str, optional
1029
1063
  Dimension names associated with the model data. If None, defaults to ("time", "obs_state")
1064
+ scenario: dict[str, pd.DataFrame], optional
1065
+ Dictionary of out-of-sample scenario dataframes. If provided, it must have values for all data variables
1066
+ in the model. pm.set_data is used to replace training data with new values.
1030
1067
 
1031
1068
  Returns
1032
1069
  -------
@@ -1079,7 +1116,6 @@ class PyMCStateSpace:
1079
1116
  R,
1080
1117
  H,
1081
1118
  Q,
1082
- mode=self._fit_mode,
1083
1119
  )
1084
1120
 
1085
1121
  filter_outputs.pop(-1)
@@ -1089,7 +1125,7 @@ class PyMCStateSpace:
1089
1125
  filtered_covariances, predicted_covariances, _ = covariances
1090
1126
 
1091
1127
  [smoothed_states, smoothed_covariances] = self.kalman_smoother.build_graph(
1092
- T, R, Q, filtered_states, filtered_covariances, mode=self._fit_mode
1128
+ T, R, Q, filtered_states, filtered_covariances
1093
1129
  )
1094
1130
 
1095
1131
  grouped_outputs = [
@@ -1106,6 +1142,7 @@ class PyMCStateSpace:
1106
1142
  group: str,
1107
1143
  random_seed: RandomState | None = None,
1108
1144
  data: pt.TensorLike | None = None,
1145
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
1109
1146
  **kwargs,
1110
1147
  ):
1111
1148
  """
@@ -1127,6 +1164,14 @@ class PyMCStateSpace:
1127
1164
  Observed data on which to condition the model. If not provided, the function will use the data that was
1128
1165
  provided when the model was built.
1129
1166
 
1167
+ mvn_method: str, default "svd"
1168
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
1169
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
1170
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
1171
+
1172
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
1173
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
1174
+
1130
1175
  kwargs:
1131
1176
  Additional keyword arguments are passed to pymc.sample_posterior_predictive
1132
1177
 
@@ -1142,6 +1187,9 @@ class PyMCStateSpace:
1142
1187
  _verify_group(group)
1143
1188
  group_idata = getattr(idata, group)
1144
1189
 
1190
+ compile_kwargs = kwargs.pop("compile_kwargs", {})
1191
+ compile_kwargs.setdefault("mode", self.mode)
1192
+
1145
1193
  with pm.Model(coords=self._fit_coords) as forward_model:
1146
1194
  (
1147
1195
  [
@@ -1178,6 +1226,7 @@ class PyMCStateSpace:
1178
1226
  covs=cov,
1179
1227
  logp=dummy_ll,
1180
1228
  dims=state_dims,
1229
+ method=mvn_method,
1181
1230
  )
1182
1231
 
1183
1232
  obs_mu = (Z @ mu[..., None]).squeeze(-1)
@@ -1189,6 +1238,7 @@ class PyMCStateSpace:
1189
1238
  covs=obs_cov,
1190
1239
  logp=dummy_ll,
1191
1240
  dims=obs_dims,
1241
+ method=mvn_method,
1192
1242
  )
1193
1243
 
1194
1244
  # TODO: Remove this after pm.Flat initial values are fixed
@@ -1205,8 +1255,8 @@ class PyMCStateSpace:
1205
1255
  for name in FILTER_OUTPUT_TYPES
1206
1256
  for suffix in ["", "_observed"]
1207
1257
  ],
1208
- compile_kwargs={"mode": get_mode(self._fit_mode)},
1209
1258
  random_seed=random_seed,
1259
+ compile_kwargs=compile_kwargs,
1210
1260
  **kwargs,
1211
1261
  )
1212
1262
 
@@ -1219,6 +1269,7 @@ class PyMCStateSpace:
1219
1269
  steps: int | None = None,
1220
1270
  use_data_time_dim: bool = False,
1221
1271
  random_seed: RandomState | None = None,
1272
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
1222
1273
  **kwargs,
1223
1274
  ):
1224
1275
  """
@@ -1248,6 +1299,14 @@ class PyMCStateSpace:
1248
1299
  random_seed : int, RandomState or Generator, optional
1249
1300
  Seed for the random number generator.
1250
1301
 
1302
+ mvn_method: str, default "svd"
1303
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
1304
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
1305
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
1306
+
1307
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
1308
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
1309
+
1251
1310
  kwargs:
1252
1311
  Additional keyword arguments are passed to pymc.sample_posterior_predictive
1253
1312
 
@@ -1263,6 +1322,10 @@ class PyMCStateSpace:
1263
1322
  the latent state trajectories: `y[t] = Z @ x[t] + nu[t]`, where `nu ~ N(0, H)`.
1264
1323
  """
1265
1324
  _verify_group(group)
1325
+
1326
+ compile_kwargs = kwargs.pop("compile_kwargs", {})
1327
+ compile_kwargs.setdefault("mode", self.mode)
1328
+
1266
1329
  group_idata = getattr(idata, group)
1267
1330
  dims = None
1268
1331
  temp_coords = self._fit_coords.copy()
@@ -1305,7 +1368,7 @@ class PyMCStateSpace:
1305
1368
  *matrices,
1306
1369
  steps=steps,
1307
1370
  dims=dims,
1308
- mode=self._fit_mode,
1371
+ method=mvn_method,
1309
1372
  sequence_names=self.kalman_filter.seq_names,
1310
1373
  k_endog=self.k_endog,
1311
1374
  )
@@ -1320,15 +1383,19 @@ class PyMCStateSpace:
1320
1383
  idata_unconditional = pm.sample_posterior_predictive(
1321
1384
  group_idata,
1322
1385
  var_names=[f"{group}_latent", f"{group}_observed"],
1323
- compile_kwargs={"mode": self._fit_mode},
1324
1386
  random_seed=random_seed,
1387
+ compile_kwargs=compile_kwargs,
1325
1388
  **kwargs,
1326
1389
  )
1327
1390
 
1328
1391
  return idata_unconditional.posterior_predictive
1329
1392
 
1330
1393
  def sample_conditional_prior(
1331
- self, idata: InferenceData, random_seed: RandomState | None = None, **kwargs
1394
+ self,
1395
+ idata: InferenceData,
1396
+ random_seed: RandomState | None = None,
1397
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
1398
+ **kwargs,
1332
1399
  ) -> InferenceData:
1333
1400
  """
1334
1401
  Sample from the conditional prior; that is, given parameter draws from the prior distribution,
@@ -1344,6 +1411,14 @@ class PyMCStateSpace:
1344
1411
  random_seed : int, RandomState or Generator, optional
1345
1412
  Seed for the random number generator.
1346
1413
 
1414
+ mvn_method: str, default "svd"
1415
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
1416
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
1417
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
1418
+
1419
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
1420
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
1421
+
1347
1422
  kwargs:
1348
1423
  Additional keyword arguments are passed to pymc.sample_posterior_predictive
1349
1424
 
@@ -1355,10 +1430,16 @@ class PyMCStateSpace:
1355
1430
  "predicted_prior", and "smoothed_prior".
1356
1431
  """
1357
1432
 
1358
- return self._sample_conditional(idata, "prior", random_seed, **kwargs)
1433
+ return self._sample_conditional(
1434
+ idata=idata, group="prior", random_seed=random_seed, mvn_method=mvn_method, **kwargs
1435
+ )
1359
1436
 
1360
1437
  def sample_conditional_posterior(
1361
- self, idata: InferenceData, random_seed: RandomState | None = None, **kwargs
1438
+ self,
1439
+ idata: InferenceData,
1440
+ random_seed: RandomState | None = None,
1441
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
1442
+ **kwargs,
1362
1443
  ):
1363
1444
  """
1364
1445
  Sample from the conditional posterior; that is, given parameter draws from the posterior distribution,
@@ -1373,6 +1454,14 @@ class PyMCStateSpace:
1373
1454
  random_seed : int, RandomState or Generator, optional
1374
1455
  Seed for the random number generator.
1375
1456
 
1457
+ mvn_method: str, default "svd"
1458
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
1459
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
1460
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
1461
+
1462
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
1463
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
1464
+
1376
1465
  kwargs:
1377
1466
  Additional keyword arguments are passed to pymc.sample_posterior_predictive
1378
1467
 
@@ -1384,7 +1473,9 @@ class PyMCStateSpace:
1384
1473
  "predicted_posterior", and "smoothed_posterior".
1385
1474
  """
1386
1475
 
1387
- return self._sample_conditional(idata, "posterior", random_seed, **kwargs)
1476
+ return self._sample_conditional(
1477
+ idata=idata, group="posterior", random_seed=random_seed, mvn_method=mvn_method, **kwargs
1478
+ )
1388
1479
 
1389
1480
  def sample_unconditional_prior(
1390
1481
  self,
@@ -1392,6 +1483,7 @@ class PyMCStateSpace:
1392
1483
  steps: int | None = None,
1393
1484
  use_data_time_dim: bool = False,
1394
1485
  random_seed: RandomState | None = None,
1486
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
1395
1487
  **kwargs,
1396
1488
  ) -> InferenceData:
1397
1489
  """
@@ -1420,6 +1512,14 @@ class PyMCStateSpace:
1420
1512
  random_seed : int, RandomState or Generator, optional
1421
1513
  Seed for the random number generator.
1422
1514
 
1515
+ mvn_method: str, default "svd"
1516
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
1517
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
1518
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
1519
+
1520
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
1521
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
1522
+
1423
1523
  kwargs:
1424
1524
  Additional keyword arguments are passed to pymc.sample_posterior_predictive
1425
1525
 
@@ -1436,7 +1536,13 @@ class PyMCStateSpace:
1436
1536
  """
1437
1537
 
1438
1538
  return self._sample_unconditional(
1439
- idata, "prior", steps, use_data_time_dim, random_seed, **kwargs
1539
+ idata=idata,
1540
+ group="prior",
1541
+ steps=steps,
1542
+ use_data_time_dim=use_data_time_dim,
1543
+ random_seed=random_seed,
1544
+ mvn_method=mvn_method,
1545
+ **kwargs,
1440
1546
  )
1441
1547
 
1442
1548
  def sample_unconditional_posterior(
@@ -1445,6 +1551,7 @@ class PyMCStateSpace:
1445
1551
  steps: int | None = None,
1446
1552
  use_data_time_dim: bool = False,
1447
1553
  random_seed: RandomState | None = None,
1554
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
1448
1555
  **kwargs,
1449
1556
  ) -> InferenceData:
1450
1557
  """
@@ -1474,6 +1581,14 @@ class PyMCStateSpace:
1474
1581
  random_seed : int, RandomState or Generator, optional
1475
1582
  Seed for the random number generator.
1476
1583
 
1584
+ mvn_method: str, default "svd"
1585
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
1586
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
1587
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
1588
+
1589
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
1590
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
1591
+
1477
1592
  Returns
1478
1593
  -------
1479
1594
  InferenceData
@@ -1487,11 +1602,17 @@ class PyMCStateSpace:
1487
1602
  """
1488
1603
 
1489
1604
  return self._sample_unconditional(
1490
- idata, "posterior", steps, use_data_time_dim, random_seed, **kwargs
1605
+ idata=idata,
1606
+ group="posterior",
1607
+ steps=steps,
1608
+ use_data_time_dim=use_data_time_dim,
1609
+ random_seed=random_seed,
1610
+ mvn_method=mvn_method,
1611
+ **kwargs,
1491
1612
  )
1492
1613
 
1493
1614
  def sample_statespace_matrices(
1494
- self, idata, matrix_names: str | list[str] | None, group: str = "posterior"
1615
+ self, idata, matrix_names: str | list[str] | None, group: str = "posterior", **kwargs
1495
1616
  ):
1496
1617
  """
1497
1618
  Draw samples of requested statespace matrices from provided idata
@@ -1508,12 +1629,18 @@ class PyMCStateSpace:
1508
1629
  group: str, one of "posterior" or "prior"
1509
1630
  Whether to sample from priors or posteriors
1510
1631
 
1632
+ kwargs:
1633
+ Additional keyword arguments are passed to ``pymc.sample_posterior_predictive``
1634
+
1511
1635
  Returns
1512
1636
  -------
1513
1637
  idata_matrices: az.InterenceData
1514
1638
  """
1515
1639
  _verify_group(group)
1516
1640
 
1641
+ compile_kwargs = kwargs.pop("compile_kwargs", {})
1642
+ compile_kwargs.setdefault("mode", self.mode)
1643
+
1517
1644
  if matrix_names is None:
1518
1645
  matrix_names = MATRIX_NAMES
1519
1646
  elif isinstance(matrix_names, str):
@@ -1544,8 +1671,9 @@ class PyMCStateSpace:
1544
1671
  matrix_idata = pm.sample_posterior_predictive(
1545
1672
  idata if group == "posterior" else idata.prior,
1546
1673
  var_names=matrix_names,
1547
- compile_kwargs={"mode": self._fit_mode},
1548
1674
  extend_inferencedata=False,
1675
+ compile_kwargs=compile_kwargs,
1676
+ **kwargs,
1549
1677
  )
1550
1678
 
1551
1679
  return matrix_idata
@@ -1567,8 +1695,10 @@ class PyMCStateSpace:
1567
1695
  raise ValueError(
1568
1696
  "Integer start must be within the range of the data index used to fit the model."
1569
1697
  )
1570
- if periods is None and end is None:
1571
- raise ValueError("Must specify one of either periods or end")
1698
+ if periods is None and end is None and not use_scenario_index:
1699
+ raise ValueError(
1700
+ "Must specify one of either periods or end unless use_scenario_index=True"
1701
+ )
1572
1702
  if periods is not None and end is not None:
1573
1703
  raise ValueError("Must specify exactly one of either periods or end")
1574
1704
  if scenario is None and use_scenario_index:
@@ -1928,6 +2058,7 @@ class PyMCStateSpace:
1928
2058
  filter_output="smoothed",
1929
2059
  random_seed: RandomState | None = None,
1930
2060
  verbose: bool = True,
2061
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
1931
2062
  **kwargs,
1932
2063
  ) -> InferenceData:
1933
2064
  """
@@ -1984,6 +2115,14 @@ class PyMCStateSpace:
1984
2115
  verbose: bool, default=True
1985
2116
  Whether to print diagnostic information about forecasting.
1986
2117
 
2118
+ mvn_method: str, default "svd"
2119
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
2120
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
2121
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
2122
+
2123
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
2124
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
2125
+
1987
2126
  kwargs:
1988
2127
  Additional keyword arguments are passed to pymc.sample_posterior_predictive
1989
2128
 
@@ -2003,6 +2142,10 @@ class PyMCStateSpace:
2003
2142
  filter_time_dim = TIME_DIM
2004
2143
 
2005
2144
  _validate_filter_arg(filter_output)
2145
+
2146
+ compile_kwargs = kwargs.pop("compile_kwargs", {})
2147
+ compile_kwargs.setdefault("mode", self.mode)
2148
+
2006
2149
  time_index = self._get_fit_time_index()
2007
2150
 
2008
2151
  if start is None and verbose:
@@ -2060,9 +2203,18 @@ class PyMCStateSpace:
2060
2203
 
2061
2204
  with pm.Model(coords=temp_coords) as forecast_model:
2062
2205
  (_, _, *matrices), grouped_outputs = self._kalman_filter_outputs_from_dummy_graph(
2206
+ scenario=scenario,
2063
2207
  data_dims=["data_time", OBS_STATE_DIM],
2064
2208
  )
2065
2209
 
2210
+ for name in self.data_names:
2211
+ if name in scenario.keys():
2212
+ pm.set_data(
2213
+ {"data": np.zeros((len(forecast_index), self.k_endog))},
2214
+ coords={"data_time": np.arange(len(forecast_index))},
2215
+ )
2216
+ break
2217
+
2066
2218
  group_idx = FILTER_OUTPUT_TYPES.index(filter_output)
2067
2219
  mu, cov = grouped_outputs[group_idx]
2068
2220
 
@@ -2073,17 +2225,6 @@ class PyMCStateSpace:
2073
2225
  "P0_slice", cov[t0_idx], dims=cov_dims[1:] if cov_dims is not None else None
2074
2226
  )
2075
2227
 
2076
- if scenario is not None:
2077
- sub_dict = {
2078
- forecast_model[data_name]: pt.as_tensor_variable(
2079
- scenario.get(data_name), name=data_name
2080
- )
2081
- for data_name in self.data_names
2082
- }
2083
-
2084
- matrices = graph_replace(matrices, replace=sub_dict, strict=True)
2085
- [setattr(matrix, "name", name) for name, matrix in zip(MATRIX_NAMES[2:], matrices)]
2086
-
2087
2228
  _ = LinearGaussianStateSpace(
2088
2229
  "forecast",
2089
2230
  x0,
@@ -2091,10 +2232,10 @@ class PyMCStateSpace:
2091
2232
  *matrices,
2092
2233
  steps=len(forecast_index),
2093
2234
  dims=dims,
2094
- mode=self._fit_mode,
2095
2235
  sequence_names=self.kalman_filter.seq_names,
2096
2236
  k_endog=self.k_endog,
2097
2237
  append_x0=False,
2238
+ method=mvn_method,
2098
2239
  )
2099
2240
 
2100
2241
  forecast_model.rvs_to_initial_values = {
@@ -2106,8 +2247,8 @@ class PyMCStateSpace:
2106
2247
  idata_forecast = pm.sample_posterior_predictive(
2107
2248
  idata,
2108
2249
  var_names=["forecast_latent", "forecast_observed"],
2109
- compile_kwargs={"mode": self._fit_mode},
2110
2250
  random_seed=random_seed,
2251
+ compile_kwargs=compile_kwargs,
2111
2252
  **kwargs,
2112
2253
  )
2113
2254
 
@@ -2123,6 +2264,7 @@ class PyMCStateSpace:
2123
2264
  shock_trajectory: np.ndarray | None = None,
2124
2265
  orthogonalize_shocks: bool = False,
2125
2266
  random_seed: RandomState | None = None,
2267
+ mvn_method: Literal["cholesky", "eigh", "svd"] = "svd",
2126
2268
  **kwargs,
2127
2269
  ):
2128
2270
  """
@@ -2174,6 +2316,14 @@ class PyMCStateSpace:
2174
2316
  random_seed : int, RandomState or Generator, optional
2175
2317
  Seed for the random number generator.
2176
2318
 
2319
+ mvn_method: str, default "svd"
2320
+ Method used to invert the covariance matrix when calculating the pdf of a multivariate normal
2321
+ (or when generating samples). One of "cholesky", "eigh", or "svd". "cholesky" is fastest, but least robust
2322
+ to ill-conditioned matrices, while "svd" is slow but extremely robust.
2323
+
2324
+ In general, if your model has measurement error, "cholesky" will be safe to use. Otherwise, "svd" is
2325
+ recommended. "eigh" can also be tried if sampling with "svd" is very slow, but it is not as robust as "svd".
2326
+
2177
2327
  kwargs:
2178
2328
  Additional keyword arguments are passed to pymc.sample_posterior_predictive
2179
2329
 
@@ -2186,6 +2336,9 @@ class PyMCStateSpace:
2186
2336
  n_options = sum(x is not None for x in options)
2187
2337
  Q = None # No covariance matrix needed if a trajectory is provided. Will be overwritten later if needed.
2188
2338
 
2339
+ compile_kwargs = kwargs.pop("compile_kwargs", {})
2340
+ compile_kwargs.setdefault("mode", self.mode)
2341
+
2189
2342
  if n_options > 1:
2190
2343
  raise ValueError("Specify exactly 0 or 1 of shock_size, shock_cov, or shock_trajectory")
2191
2344
  elif n_options == 1:
@@ -2233,7 +2386,7 @@ class PyMCStateSpace:
2233
2386
  shock_trajectory = pt.zeros((n_steps, self.k_posdef))
2234
2387
  if Q is not None:
2235
2388
  init_shock = pm.MvNormal(
2236
- "initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM], method="svd"
2389
+ "initial_shock", mu=0, cov=Q, dims=[SHOCK_DIM], method=mvn_method
2237
2390
  )
2238
2391
  else:
2239
2392
  init_shock = pm.Deterministic(
@@ -2257,29 +2410,15 @@ class PyMCStateSpace:
2257
2410
  non_sequences=[c, T, R],
2258
2411
  n_steps=n_steps,
2259
2412
  strict=True,
2260
- mode=self._fit_mode,
2261
2413
  )
2262
2414
 
2263
2415
  pm.Deterministic("irf", irf, dims=[TIME_DIM, ALL_STATE_DIM])
2264
2416
 
2265
- compile_kwargs = kwargs.get("compile_kwargs", {})
2266
- if "mode" not in compile_kwargs.keys():
2267
- compile_kwargs = {"mode": self._fit_mode}
2268
- else:
2269
- mode = compile_kwargs.get("mode")
2270
- if mode is not None and mode != self._fit_mode:
2271
- raise ValueError(
2272
- f"User provided compile mode ({mode}) does not match the compile mode used to "
2273
- f"construct the model ({self._fit_mode})."
2274
- )
2275
-
2276
- compile_kwargs.update({"mode": self._fit_mode})
2277
-
2278
2417
  irf_idata = pm.sample_posterior_predictive(
2279
2418
  idata,
2280
2419
  var_names=["irf"],
2281
- compile_kwargs=compile_kwargs,
2282
2420
  random_seed=random_seed,
2421
+ compile_kwargs=compile_kwargs,
2283
2422
  **kwargs,
2284
2423
  )
2285
2424