pymc-extras 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. pymc_extras/distributions/__init__.py +5 -5
  2. pymc_extras/distributions/histogram_utils.py +1 -1
  3. pymc_extras/inference/__init__.py +1 -1
  4. pymc_extras/printing.py +1 -1
  5. pymc_extras/statespace/__init__.py +4 -4
  6. pymc_extras/statespace/core/__init__.py +1 -1
  7. pymc_extras/statespace/core/representation.py +8 -8
  8. pymc_extras/statespace/core/statespace.py +94 -23
  9. pymc_extras/statespace/filters/__init__.py +3 -3
  10. pymc_extras/statespace/filters/kalman_filter.py +16 -11
  11. pymc_extras/statespace/models/SARIMAX.py +138 -74
  12. pymc_extras/statespace/models/VARMAX.py +248 -57
  13. pymc_extras/statespace/models/__init__.py +2 -2
  14. pymc_extras/statespace/models/structural/__init__.py +4 -4
  15. pymc_extras/statespace/models/structural/components/autoregressive.py +49 -24
  16. pymc_extras/statespace/models/structural/components/cycle.py +48 -28
  17. pymc_extras/statespace/models/structural/components/level_trend.py +61 -29
  18. pymc_extras/statespace/models/structural/components/measurement_error.py +22 -5
  19. pymc_extras/statespace/models/structural/components/regression.py +47 -18
  20. pymc_extras/statespace/models/structural/components/seasonality.py +278 -95
  21. pymc_extras/statespace/models/structural/core.py +27 -8
  22. pymc_extras/statespace/utils/constants.py +17 -14
  23. pymc_extras/statespace/utils/data_tools.py +1 -1
  24. {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/METADATA +1 -1
  25. {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/RECORD +27 -27
  26. {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/WHEEL +0 -0
  27. {pymc_extras-0.4.0.dist-info → pymc_extras-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -17,11 +17,13 @@ from pymc_extras.statespace.utils.constants import (
17
17
  ALL_STATE_AUX_DIM,
18
18
  ALL_STATE_DIM,
19
19
  AR_PARAM_DIM,
20
+ EXOGENOUS_DIM,
20
21
  MA_PARAM_DIM,
21
22
  OBS_STATE_DIM,
22
23
  SARIMAX_STATE_STRUCTURES,
23
24
  SEASONAL_AR_PARAM_DIM,
24
25
  SEASONAL_MA_PARAM_DIM,
26
+ TIME_DIM,
25
27
  )
26
28
 
27
29
 
@@ -38,70 +40,16 @@ def _verify_order(p, d, q, P, D, Q, S):
38
40
  )
39
41
 
40
42
 
41
- class BayesianSARIMA(PyMCStateSpace):
43
+ class BayesianSARIMAX(PyMCStateSpace):
42
44
  r"""
43
- Seasonal AutoRegressive Integrated Moving Average with eXogenous regressors
45
+ Seasonal AutoRegressive Integrated Moving Average with eXogenous regressors.
44
46
 
45
- Parameters
46
- ----------
47
- order: tuple(int, int, int)
48
- Order of the ARIMA process. The order has the notation (p, d, q), where p is the number of autoregressive
49
- lags, q is the number of moving average components, and d is order of integration -- the number of
50
- differences needed to render the data stationary.
51
-
52
- If d > 0, the differences are modeled as components of the hidden state, and all available data can be used.
53
- This is only possible if state_structure = 'fast'. For interpretable states, the user must manually
54
- difference the data prior to calling the `build_statespace_graph` method.
55
-
56
- seasonal_order: tuple(int, int, int, int), optional
57
- Seasonal order of the SARIMA process. The order has the notation (P, D, Q, S), where P is the number of seasonal
58
- lags to include, Q is the number of seasonal innovation lags to include, and D is the number of seasonal
59
- differences to perform. S is the length of the season.
60
-
61
- Seasonal terms are similar to ARIMA terms, in that they are merely lags of the data or innovations. It is thus
62
- possible for the seasonal lags and the ARIMA lags to overlap, for example if P <= p. In this case, an error
63
- will be raised.
64
-
65
- stationary_initialization: bool, default False
66
- If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady
67
- state values will be used.
68
-
69
- .. warning:: This option is very sensitive to the priors placed on the AR and MA parameters. If the model dynamics
70
- for a given sample are not stationary, sampling will fail with a "covariance is not positive semi-definite"
71
- error.
72
-
73
- filter_type: str, default "standard"
74
- The type of Kalman Filter to use. Options are "standard", "single", "univariate", "steady_state",
75
- and "cholesky". See the docs for kalman filters for more details.
76
-
77
- state_structure: str, default "fast"
78
- How to represent the state-space system. Currently, there are two choices: "fast" or "interpretable"
79
-
80
- - "fast" corresponds to the state space used by [2], and is called the "Harvey" representation in statsmodels.
81
- This is also the default representation used by statsmodels.tsa.statespace.SARIMAX. The states combine lags
82
- and innovations at different lags to compress the dimension of the state vector to max(p, 1+q). As a result,
83
- it is very preformat, but only the first state has a clear interpretation.
84
-
85
- - "interpretable" maximally expands the state vector, doing zero state compression. As a result, the state has
86
- dimension max(1, p) + max(1, q). What is gained by doing this is that every state has an obvious meaning, as
87
- either the data, an innovation, or a lag thereof.
88
-
89
- measurement_error: bool, default True
90
- If true, a measurement error term is added to the model.
91
-
92
- verbose: bool, default True
93
- If true, a message will be logged to the terminal explaining the variable names, dimensions, and supports.
94
-
95
- mode: str or Mode, optional
96
- Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and
97
- ``forecast``. The mode does **not** effect calls to ``pm.sample``.
98
-
99
- Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument
100
- to all sampling methods.
47
+ This class implements a Bayesian approach to SARIMAX models, which are used for modeling univariate time series data
48
+ with seasonal and non-seasonal components. The model supports exogenous regressors.
101
49
 
102
50
  Notes
103
51
  -----
104
- The ARIMAX model is a univariate time series model that posits the future evolution of a stationary time series will
52
+ The ARIMAX model is a univariate time series model that posits the future evolution of a stationary time series will
105
53
  be a function of its past values, together with exogenous "innovations" and their past history. The model is
106
54
  described by its "order", a 3-tuple (p, d, q), that are:
107
55
 
@@ -151,14 +99,14 @@ class BayesianSARIMA(PyMCStateSpace):
151
99
 
152
100
  Examples
153
101
  --------
154
- The following example shows how to build an ARMA(1, 1) model -- ARIMA(1, 0, 1) -- using the BayesianSARIMA class:
102
+ The following example shows how to build an ARMA(1, 1) model -- ARIMA(1, 0, 1) -- using the BayesianSARIMAX class:
155
103
 
156
104
  .. code:: python
157
105
 
158
106
  import pymc_extras.statespace as pmss
159
107
  import pymc as pm
160
108
 
161
- ss_mod = pmss.BayesianSARIMA(order=(1, 0, 1), verbose=True)
109
+ ss_mod = pmss.BayesianSARIMAX(order=(1, 0, 1), verbose=True)
162
110
 
163
111
  with pm.Model(coords=ss_mod.coords) as arma_model:
164
112
  state_sigmas = pm.HalfNormal("sigma_state", sigma=1.0, dims=ss_mod.param_dims["sigma_state"])
@@ -183,6 +131,8 @@ class BayesianSARIMA(PyMCStateSpace):
183
131
  self,
184
132
  order: tuple[int, int, int],
185
133
  seasonal_order: tuple[int, int, int, int] | None = None,
134
+ exog_state_names: list[str] | None = None,
135
+ k_exog: int | None = None,
186
136
  stationary_initialization: bool = True,
187
137
  filter_type: str = "standard",
188
138
  state_structure: str = "fast",
@@ -191,28 +141,104 @@ class BayesianSARIMA(PyMCStateSpace):
191
141
  mode: str | Mode | None = None,
192
142
  ):
193
143
  """
144
+ Initialize a BayesianSARIMAX model.
194
145
 
195
146
  Parameters
196
147
  ----------
197
- order
198
- seasonal_order
199
- stationary_initialization
200
- filter_type
201
- state_structure
202
- measurement_error
203
- verbose
204
- mode
148
+ order : tuple of int, int, int
149
+ Order of the ARIMA process. The order has the notation (p, d, q), where p is the number of autoregressive
150
+ lags, q is the number of moving average components, and d is order of integration -- the number of
151
+ differences needed to render the data stationary.
152
+
153
+ If d > 0, the differences are modeled as components of the hidden state, and all available data can be used.
154
+ This is only possible if state_structure = 'fast'. For interpretable states, the user must manually
155
+ difference the data prior to calling the `build_statespace_graph` method.
156
+
157
+ seasonal_order : tuple of int, int, int, int, optional
158
+ Seasonal order of the SARIMA process. The order has the notation (P, D, Q, S), where P is the number of seasonal
159
+ lags to include, Q is the number of seasonal innovation lags to include, and D is the number of seasonal
160
+ differences to perform. S is the length of the season.
161
+
162
+ Seasonal terms are similar to ARIMA terms, in that they are merely lags of the data or innovations. It is thus
163
+ possible for the seasonal lags and the ARIMA lags to overlap, for example if P <= p. In this case, an error
164
+ will be raised.
165
+
166
+ exog_state_names : list[str], optional
167
+ Names of the exogenous state variables.
168
+
169
+ k_exog : int, optional
170
+ Number of exogenous variables. If provided, must match the length of
171
+ `exog_state_names`.
172
+
173
+ stationary_initialization : bool, default True
174
+ If true, the initial state and initial state covariance will not be assigned priors. Instead, their steady
175
+ state values will be used.
176
+
177
+ .. warning:: This option is very sensitive to the priors placed on the AR and MA parameters. If the model dynamics
178
+ for a given sample are not stationary, sampling will fail with a "covariance is not positive semi-definite"
179
+ error.
180
+
181
+ filter_type : str, default "standard"
182
+ The type of Kalman Filter to use. Options are "standard", "single", "univariate", "steady_state",
183
+ and "cholesky". See the docs for kalman filters for more details.
184
+
185
+ state_structure : str, default "fast"
186
+ How to represent the state-space system. Currently, there are two choices: "fast" or "interpretable"
187
+
188
+ - "fast" corresponds to the state space used by [2], and is called the "Harvey" representation in statsmodels.
189
+ This is also the default representation used by statsmodels.tsa.statespace.SARIMAX. The states combine lags
190
+ and innovations at different lags to compress the dimension of the state vector to max(p, 1+q). As a result,
191
+ it is very performant, but only the first state has a clear interpretation.
192
+
193
+ - "interpretable" maximally expands the state vector, doing zero state compression. As a result, the state has
194
+ dimension max(1, p) + max(1, q). What is gained by doing this is that every state has an obvious meaning, as
195
+ either the data, an innovation, or a lag thereof.
196
+
197
+ measurement_error : bool, default False
198
+ If true, a measurement error term is added to the model.
199
+
200
+ verbose : bool, default True
201
+ If true, a message will be logged to the terminal explaining the variable names, dimensions, and supports.
202
+
203
+ mode : str or Mode, optional
204
+ Pytensor compile mode, used in auxiliary sampling methods such as ``sample_conditional_posterior`` and
205
+ ``forecast``. The mode does **not** affect calls to ``pm.sample``.
206
+
207
+ Regardless of whether a mode is specified, it can always be overwritten via the ``compile_kwargs`` argument
208
+ to all sampling methods.
205
209
  """
206
210
  # Model order
207
211
  self.p, self.d, self.q = order
208
212
  if seasonal_order is None:
209
213
  seasonal_order = (0, 0, 0, 0)
210
214
 
215
+ if exog_state_names is None and k_exog is not None:
216
+ exog_state_names = [f"exogenous_{i}" for i in range(k_exog)]
217
+ elif exog_state_names is not None and k_exog is None:
218
+ k_exog = len(exog_state_names)
219
+ elif exog_state_names is not None and k_exog is not None:
220
+ if len(exog_state_names) != k_exog:
221
+ raise ValueError(
222
+ f"Based on provided inputs, expected exog_state_names to have {k_exog} elements, but "
223
+ f"found {len(exog_state_names)}"
224
+ )
225
+ else:
226
+ k_exog = 0
227
+
228
+ self.exog_state_names = exog_state_names
229
+ self.k_exog = k_exog
230
+
211
231
  self.P, self.D, self.Q, self.S = seasonal_order
212
232
  _verify_order(self.p, self.d, self.q, self.P, self.D, self.Q, self.S)
213
233
 
214
234
  self.stationary_initialization = stationary_initialization
215
235
 
236
+ if (self.d or self.D) and self.stationary_initialization:
237
+ raise ValueError(
238
+ "Cannot use stationary initialization with differencing. "
239
+ "Set stationary_initialization=False."
240
+ )
241
+
216
242
  self.state_structure = state_structure
217
243
 
218
244
  self._p_max = max(1, self.p + self.P * self.S)
@@ -224,7 +250,7 @@ class BayesianSARIMA(PyMCStateSpace):
224
250
  if state_structure not in SARIMAX_STATE_STRUCTURES:
225
251
  raise ValueError(
226
252
  f"Got invalid argument {state_structure} for state structure, expected one of "
227
- f'{", ".join(SARIMAX_STATE_STRUCTURES)}'
253
+ f"{', '.join(SARIMAX_STATE_STRUCTURES)}"
228
254
  )
229
255
 
230
256
  if state_structure == "interpretable" and (self.d + self.D) > 0:
@@ -252,6 +278,7 @@ class BayesianSARIMA(PyMCStateSpace):
252
278
  measurement_error=measurement_error,
253
279
  mode=mode,
254
280
  )
281
+ self._needs_exog_data = self.k_exog > 0
255
282
 
256
283
  @property
257
284
  def param_names(self):
@@ -262,6 +289,7 @@ class BayesianSARIMA(PyMCStateSpace):
262
289
  "ma_params",
263
290
  "seasonal_ar_params",
264
291
  "seasonal_ma_params",
292
+ "beta_exog",
265
293
  "sigma_state",
266
294
  "sigma_obs",
267
295
  ]
@@ -276,11 +304,24 @@ class BayesianSARIMA(PyMCStateSpace):
276
304
  names.remove("ma_params")
277
305
  if self.Q == 0:
278
306
  names.remove("seasonal_ma_params")
307
+ if self.k_exog == 0:
308
+ names.remove("beta_exog")
279
309
  if not self.measurement_error:
280
310
  names.remove("sigma_obs")
281
311
 
282
312
  return names
283
313
 
314
+ @property
315
+ def data_info(self) -> dict[str, dict[str, Any]]:
316
+ info = {
317
+ "exogenous_data": {
318
+ "dims": (TIME_DIM, EXOGENOUS_DIM),
319
+ "shape": (None, self.k_exog),
320
+ }
321
+ }
322
+
323
+ return {name: info[name] for name in self.data_names}
324
+
284
325
  @property
285
326
  def param_info(self) -> dict[str, dict[str, Any]]:
286
327
  info = {
@@ -293,11 +334,11 @@ class BayesianSARIMA(PyMCStateSpace):
293
334
  "constraints": "Positive Semi-definite",
294
335
  },
295
336
  "sigma_obs": {
296
- "shape": None if self.k_endog == 1 else (self.k_endog,),
337
+ "shape": () if self.k_endog == 1 else (self.k_endog,),
297
338
  "constraints": "Positive",
298
339
  },
299
340
  "sigma_state": {
300
- "shape": None if self.k_posdef == 1 else (self.k_posdef,),
341
+ "shape": () if self.k_posdef == 1 else (self.k_posdef,),
301
342
  "constraints": "Positive",
302
343
  },
303
344
  "ar_params": {
@@ -310,6 +351,7 @@ class BayesianSARIMA(PyMCStateSpace):
310
351
  },
311
352
  "seasonal_ar_params": {"shape": (self.P,), "constraints": "None"},
312
353
  "seasonal_ma_params": {"shape": (self.Q,), "constraints": "None"},
354
+ "beta_exog": {"shape": (self.k_exog,), "constraints": "None"},
313
355
  }
314
356
 
315
357
  for name in self.param_names:
@@ -336,6 +378,12 @@ class BayesianSARIMA(PyMCStateSpace):
336
378
 
337
379
  return states
338
380
 
381
+ @property
382
+ def data_names(self) -> list[str]:
383
+ if self.k_exog > 0:
384
+ return ["exogenous_data"]
385
+ return []
386
+
339
387
  @property
340
388
  def observed_states(self):
341
389
  return [self.state_names[0]]
@@ -355,6 +403,7 @@ class BayesianSARIMA(PyMCStateSpace):
355
403
  "ma_params": (MA_PARAM_DIM,),
356
404
  "seasonal_ar_params": (SEASONAL_AR_PARAM_DIM,),
357
405
  "seasonal_ma_params": (SEASONAL_MA_PARAM_DIM,),
406
+ "beta_exog": (EXOGENOUS_DIM,),
358
407
  }
359
408
  if self.k_endog == 1:
360
409
  coord_map["sigma_state"] = None
@@ -369,6 +418,8 @@ class BayesianSARIMA(PyMCStateSpace):
369
418
  del coord_map["seasonal_ar_params"]
370
419
  if self.Q == 0:
371
420
  del coord_map["seasonal_ma_params"]
421
+ if self.k_exog == 0:
422
+ del coord_map["beta_exog"]
372
423
  if self.stationary_initialization:
373
424
  del coord_map["P0"]
374
425
  del coord_map["x0"]
@@ -386,7 +437,8 @@ class BayesianSARIMA(PyMCStateSpace):
386
437
  coords.update({SEASONAL_AR_PARAM_DIM: list(range(1, self.P + 1))})
387
438
  if self.Q > 0:
388
439
  coords.update({SEASONAL_MA_PARAM_DIM: list(range(1, self.Q + 1))})
389
-
440
+ if self.k_exog > 0:
441
+ coords.update({EXOGENOUS_DIM: self.exog_state_names})
390
442
  return coords
391
443
 
392
444
  def _stationary_initialization(self):
@@ -396,7 +448,7 @@ class BayesianSARIMA(PyMCStateSpace):
396
448
  Q = self.ssm["state_cov"]
397
449
  c = self.ssm["state_intercept"]
398
450
 
399
- x0 = pt.linalg.solve(pt.identity_like(T) - T, c, assume_a="gen", check_finite=True)
451
+ x0 = pt.linalg.solve(pt.identity_like(T) - T, c, assume_a="gen", check_finite=False)
400
452
  P0 = solve_discrete_lyapunov(T, pt.linalg.matrix_dot(R, Q, R.T), method="bilinear")
401
453
 
402
454
  return x0, P0
@@ -535,6 +587,18 @@ class BayesianSARIMA(PyMCStateSpace):
535
587
  ma_params, Q
536
588
  )
537
589
 
590
+ # If exogenous regressors are present, register them as data and include a regression term
591
+ # in the observation intercept
592
+ if self.k_exog > 0:
593
+ exog_data = self.make_and_register_data(
594
+ "exogenous_data", shape=(None, self.k_exog), dtype=floatX
595
+ )
596
+ exog_beta = self.make_and_register_variable(
597
+ "beta_exog", shape=(self.k_exog,), dtype=floatX
598
+ )
599
+
600
+ self.ssm["obs_intercept"] = (exog_data @ exog_beta)[:, None]
601
+
538
602
  # Set up the state covariance matrix
539
603
  state_cov_idx = ("state_cov", *np.diag_indices(self.k_posdef))
540
604
  state_cov = self.make_and_register_variable(