pymc-extras 0.3.1__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. pymc_extras/distributions/__init__.py +5 -5
  2. pymc_extras/distributions/histogram_utils.py +1 -1
  3. pymc_extras/inference/__init__.py +1 -1
  4. pymc_extras/inference/laplace_approx/find_map.py +12 -5
  5. pymc_extras/inference/laplace_approx/idata.py +4 -3
  6. pymc_extras/inference/laplace_approx/laplace.py +6 -4
  7. pymc_extras/inference/pathfinder/pathfinder.py +1 -2
  8. pymc_extras/printing.py +1 -1
  9. pymc_extras/statespace/__init__.py +4 -4
  10. pymc_extras/statespace/core/__init__.py +1 -1
  11. pymc_extras/statespace/core/representation.py +8 -8
  12. pymc_extras/statespace/core/statespace.py +94 -23
  13. pymc_extras/statespace/filters/__init__.py +3 -3
  14. pymc_extras/statespace/filters/kalman_filter.py +16 -11
  15. pymc_extras/statespace/models/SARIMAX.py +138 -74
  16. pymc_extras/statespace/models/VARMAX.py +248 -57
  17. pymc_extras/statespace/models/__init__.py +2 -2
  18. pymc_extras/statespace/models/structural/__init__.py +21 -0
  19. pymc_extras/statespace/models/structural/components/__init__.py +0 -0
  20. pymc_extras/statespace/models/structural/components/autoregressive.py +213 -0
  21. pymc_extras/statespace/models/structural/components/cycle.py +325 -0
  22. pymc_extras/statespace/models/structural/components/level_trend.py +289 -0
  23. pymc_extras/statespace/models/structural/components/measurement_error.py +154 -0
  24. pymc_extras/statespace/models/structural/components/regression.py +257 -0
  25. pymc_extras/statespace/models/structural/components/seasonality.py +628 -0
  26. pymc_extras/statespace/models/structural/core.py +919 -0
  27. pymc_extras/statespace/models/structural/utils.py +16 -0
  28. pymc_extras/statespace/models/utilities.py +285 -0
  29. pymc_extras/statespace/utils/constants.py +21 -18
  30. pymc_extras/statespace/utils/data_tools.py +4 -3
  31. {pymc_extras-0.3.1.dist-info → pymc_extras-0.4.1.dist-info}/METADATA +5 -4
  32. {pymc_extras-0.3.1.dist-info → pymc_extras-0.4.1.dist-info}/RECORD +34 -25
  33. pymc_extras/statespace/models/structural.py +0 -1679
  34. {pymc_extras-0.3.1.dist-info → pymc_extras-0.4.1.dist-info}/WHEEL +0 -0
  35. {pymc_extras-0.3.1.dist-info → pymc_extras-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,628 @@
1
+ import numpy as np
2
+
3
+ from pytensor import tensor as pt
4
+
5
+ from pymc_extras.statespace.models.structural.core import Component
6
+ from pymc_extras.statespace.models.structural.utils import _frequency_transition_block
7
+
8
+
9
+ class TimeSeasonality(Component):
10
+ r"""
11
+ Seasonal component, modeled in the time domain
12
+
13
+ Parameters
14
+ ----------
15
+ season_length: int
16
+ The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for
17
+ daily data with weekly seasonal pattern, etc. It must be greater than one.
18
+
19
+ duration: int, default 1
20
+ Number of time steps for each seasonal period.
21
+ This determines how long each seasonal period is held constant before moving to the next.
22
+
23
+ innovations: bool, default True
24
+ Whether to include stochastic innovations in the strength of the seasonal effect
25
+
26
+ name: str, default None
27
+ A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal
28
+ components are included in the same model. Default is ``f"Seasonal[s={season_length}, d={duration}]"``
29
+
30
+ state_names: list of str, default None
31
+ List of strings for seasonal effect labels. If provided, it must be of length ``season_length`` times ``duration``.
32
+ An example would be ``state_names = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']`` when data is daily with a weekly
33
+ seasonal pattern (``season_length = 7``).
34
+
35
+ If None and ``duration = 1``, states will be named as ``[State_0, ..., State_s-1]`` (here s is ``season_length``).
36
+ If None and ``duration > 1``, states will be named as ``[State_0_0, ..., State_s-1_d-1]`` (here d is ``duration``).
37
+
38
+ remove_first_state: bool, default True
39
+ If True, the first state will be removed from the model. This is done because there are only ``season_length-1`` degrees of
40
+ freedom in the seasonal component, and one state is not identified. If False, the first state will be
41
+ included in the model, but it will not be identified -- you will need to handle this in the priors (e.g. with
42
+ ZeroSumNormal).
43
+
44
+ observed_state_names: list[str] | None, default None
45
+ List of strings for observed state labels. If None, defaults to ["data"].
46
+
47
+ share_states: bool, default False
48
+ Whether latent states are shared across the observed states. If True, there will be only one set of latent
49
+ states, which are observed by all observed states. If False, each observed state has its own set of
50
+ latent states. This argument has no effect if `k_endog` is 1.
51
+
52
+ Notes
53
+ -----
54
+ A seasonal effect is any pattern that repeats at fixed intervals. There are several ways to model such effects;
55
+ here, we present two models that are straightforward extensions of those described in [1].
56
+
57
+ **First model** (``remove_first_state=True``)
58
+
59
+ In this model, the state vector is defined as:
60
+
61
+ .. math::
62
+ \alpha_t :=(\gamma_t, \ldots, \gamma_{t-d(s-1)+1}), \quad t \ge 0.
63
+
64
+ This vector has length :math:`d(s-1)`, where:
65
+
66
+ - :math:`s` is the ``seasonal_length`` parameter, and
67
+ - :math:`d` is the ``duration`` parameter.
68
+
69
+ The components of the initial vector :math:`\alpha_{0}` are given by
70
+
71
+ .. math::
72
+ \gamma_{-l} := \tilde{\gamma}_{k_l}, \quad \text{where} \quad k_l := \left\lfloor \frac{l}{d} \right\rfloor \bmod s \quad \text{and} \quad l=0,\ldots, d(s-1)-1.
73
+
74
+ Here, the values
75
+
76
+ .. math::
77
+ \tilde{\gamma}_{0}, \ldots, \tilde{\gamma}_{s-2},
78
+
79
+ represent the initial seasonal states. The transition matrix of this model is the :math:`d(s-1) \times d(s-1)` matrix
80
+
81
+ .. math::
82
+ \begin{bmatrix}
83
+ -\mathbf{1}_d & -\mathbf{1}_d & \cdots & -\mathbf{1}_d & -\mathbf{1}_d \\
84
+ \mathbf{1}_d & \mathbf{0}_d & \cdots & \mathbf{0}_d & \mathbf{0}_d \\
85
+ \mathbf{0}_d & \mathbf{1}_d & \cdots & \mathbf{0}_d & \mathbf{0}_d \\
86
+ \vdots & \vdots & \ddots & \vdots \\
87
+ \mathbf{0}_d & \mathbf{0}_d & \cdots & \mathbf{1}_d & \mathbf{0}_d
88
+ \end{bmatrix}
89
+
90
+ where :math:`\mathbf{1}_d` and :math:`\mathbf{0}_d` denote the :math:`d \times d` identity and null matrices, respectively.
91
+
92
+ **Second model** (``remove_first_state=False``)
93
+
94
+ In contrast, the state vector in the second model is defined as:
95
+
96
+ .. math::
97
+ \alpha_t=(\gamma_t, \ldots, \gamma_{t-ds+1}), \quad t \ge 0.
98
+
99
+ This vector has length :math:`ds`. The components of the initial state vector :math:`\alpha_{0}` are defined similarly:
100
+
101
+ .. math::
102
+ \gamma_{-l} := \tilde{\gamma}_{k_l}, \quad \text{where} \quad k_l := \left\lfloor \frac{l}{d} \right\rfloor \bmod s \quad \text{and} \quad l=0,\ldots, ds-1.
103
+
104
+ In this case, the initial seasonal states :math:`\tilde{\gamma}_{0}, \ldots, \tilde{\gamma}_{s-1}` are required to satisfy the following condition:
105
+
106
+ .. math::
107
+ \sum_{i=0}^{s-1} \tilde{\gamma}_{i} = 0.
108
+
109
+ The transition matrix of this model is the following :math:`ds \times ds` circulant matrix:
110
+
111
+ .. math::
112
+ \begin{bmatrix}
113
+ 0 & 1 & 0 & \cdots & 0 \\
114
+ 0 & 0 & 1 & \cdots & 0 \\
115
+ \vdots & \vdots & \ddots & \ddots & \vdots \\
116
+ 0 & 0 & \cdots & 0 & 1 \\
117
+ 1 & 0 & \cdots & 0 & 0
118
+ \end{bmatrix}
119
+
120
+ To give interpretation to the :math:`\gamma` terms, it is helpful to work through the algebra for a simple
121
+ example. Let :math:`s=4`, :math:`d=1`, ``remove_first_state=True``, and omit the shock term. Then, we have
122
+ :math:`\gamma_{-i} = \tilde{\gamma}_{-i}`, for :math:`i=-2,\ldots, 0` and the value of the seasonal component
123
+ for the first 5 timesteps will be:
124
+
125
+ .. math::
126
+ \begin{align}
127
+ \gamma_1 &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\
128
+ \gamma_2 &= -\gamma_1 - \gamma_0 - \gamma_{-1} \\
129
+ &= -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 - \gamma_{-1} \\
130
+ &= (\gamma_0 - \gamma_0 )+ (\gamma_{-1} - \gamma_{-1}) + \gamma_{-2} \\
131
+ &= \gamma_{-2} \\
132
+ \gamma_3 &= -\gamma_2 - \gamma_1 - \gamma_0 \\
133
+ &= -\gamma_{-2} - (-\gamma_0 - \gamma_{-1} - \gamma_{-2}) - \gamma_0 \\
134
+ &= (\gamma_{-2} - \gamma_{-2}) + \gamma_{-1} + (\gamma_0 - \gamma_0) \\
135
+ &= \gamma_{-1} \\
136
+ \gamma_4 &= -\gamma_3 - \gamma_2 - \gamma_1 \\
137
+ &= -\gamma_{-1} - \gamma_{-2} -(-\gamma_0 - \gamma_{-1} - \gamma_{-2}) \\
138
+ &= (\gamma_{-2} - \gamma_{-2}) + (\gamma_{-1} - \gamma_{-1}) + \gamma_0 \\
139
+ &= \gamma_0 \\
140
+ \gamma_5 &= -\gamma_4 - \gamma_3 - \gamma_2 \\
141
+ &= -\gamma_0 - \gamma_{-1} - \gamma_{-2} \\
142
+ &= \gamma_1
143
+ \end{align}
144
+
145
+ This exercise shows that, given a list ``initial_conditions`` of length ``s-1``, the effects of this model will be:
146
+
147
+ - Period 1: ``-sum(initial_conditions)``
148
+ - Period 2: ``initial_conditions[-1]``
149
+ - Period 3: ``initial_conditions[-2]``
150
+ - ...
151
+ - Period s: ``initial_conditions[0]``
152
+ - Period s+1: ``-sum(initial_condition)``
153
+
154
+ And so on. So for interpretation, the ``season_length - 1`` initial states are, when reversed, the coefficients
155
+ associated with ``state_names[1:]``.
156
+
157
+ In the next example, we set :math:`s=2`, :math:`d=2`, ``remove_first_state=True``, and omit the shock term.
158
+ By definition, the initial vector :math:`\alpha_{0}` is
159
+
160
+ .. math::
161
+ \alpha_0=(\tilde{\gamma}_{0}, \tilde{\gamma}_{0}, \tilde{\gamma}_{-1}, \tilde{\gamma}_{-1})
162
+
163
+ and the transition matrix is
164
+
165
+ .. math::
166
+ \begin{bmatrix}
167
+ -1 & 0 & -1 & 0 \\
168
+ 0 & -1 & 0 & -1 \\
169
+ 1 & 0 & 0 & 0 \\
170
+ 0 & 1 & 0 & 0 \\
171
+ \end{bmatrix}
172
+
173
+ It is easy to verify that:
174
+
175
+ .. math::
176
+ \begin{align}
177
+ \gamma_1 &= -\tilde{\gamma}_0 - \tilde{\gamma}_{-1}\\
178
+ \gamma_2 &= -(-\tilde{\gamma}_0 - \tilde{\gamma}_{-1})-\tilde{\gamma}_0\\
179
+ &= \tilde{\gamma}_{-1}\\
180
+ \gamma_3 &= -\tilde{\gamma}_{-1} +(\tilde{\gamma}_0 + \tilde{\gamma}_{-1})\\
181
+ &= \tilde{\gamma}_{0}\\
182
+ \gamma_4 &= -\tilde{\gamma}_0 - \tilde{\gamma}_{-1}.\\
183
+ \end{align}
184
+
185
+ .. warning::
186
+ Although the ``state_names`` argument expects a list of length ``season_length`` times ``duration``,
187
+ only ``state_names[duration:]`` will be saved as model dimensions, since the first coefficient is not identified
188
+ (it is defined as :math:`-\sum_{i=1}^{s-1} \tilde{\gamma}_{-i}`).
189
+
190
+ Examples
191
+ --------
192
+ Estimate monthly with a model with a gaussian random walk trend and monthly seasonality:
193
+
194
+ .. code:: python
195
+
196
+ from pymc_extras.statespace import structural as st
197
+ import pymc as pm
198
+ import pytensor.tensor as pt
199
+ import pandas as pd
200
+
201
+ # Get month names
202
+ state_names = pd.date_range('1900-01-01', '1900-12-31', freq='MS').month_name().tolist()
203
+
204
+ # Build the structural model
205
+ grw = st.LevelTrendComponent(order=1, innovations_order=1)
206
+ annual_season = st.TimeSeasonality(
207
+ season_length=12, name="annual", state_names=state_names, innovations=False
208
+ )
209
+ ss_mod = (grw + annual_season).build()
210
+
211
+ with pm.Model(coords=ss_mod.coords) as model:
212
+ P0 = pm.Deterministic('P0', pt.eye(ss_mod.k_states) * 10, dims=ss_mod.param_dims['P0'])
213
+
214
+ initial_level_trend = pm.Deterministic(
215
+ "initial_level_trend", pt.zeros(1), dims=ss_mod.param_dims["initial_level_trend"]
216
+ )
217
+ sigma_level_trend = pm.HalfNormal(
218
+ "sigma_level_trend", sigma=1e-6, dims=ss_mod.param_dims["sigma_level_trend"]
219
+ )
220
+ params_annual = pm.Normal("params_annual", sigma=1e-2, dims=ss_mod.param_dims["params_annual"])
221
+
222
+ ss_mod.build_statespace_graph(data)
223
+ idata = pm.sample(
224
+ nuts_sampler="nutpie", nuts_sampler_kwargs={"backend": "JAX", "gradient_backend": "JAX"}
225
+ )
226
+
227
+ References
228
+ ----------
229
+ .. [1] Durbin, James, and Siem Jan Koopman. 2012.
230
+ Time Series Analysis by State Space Methods: Second Edition.
231
+ Oxford University Press.
232
+ """
233
+
234
+ def __init__(
235
+ self,
236
+ season_length: int,
237
+ duration: int = 1,
238
+ innovations: bool = True,
239
+ name: str | None = None,
240
+ state_names: list | None = None,
241
+ remove_first_state: bool = True,
242
+ observed_state_names: list[str] | None = None,
243
+ share_states: bool = False,
244
+ ):
245
+ if observed_state_names is None:
246
+ observed_state_names = ["data"]
247
+
248
+ if season_length <= 1 or not isinstance(season_length, int):
249
+ raise ValueError(
250
+ f"season_length must be an integer greater than 1, got {season_length}"
251
+ )
252
+ if duration <= 0 or not isinstance(duration, int):
253
+ raise ValueError(f"duration must be a positive integer, got {duration}")
254
+ if name is None:
255
+ name = f"Seasonal[s={season_length}, d={duration}]"
256
+ if state_names is None:
257
+ if duration > 1:
258
+ state_names = [
259
+ f"{name}_{i}_{j}" for i in range(season_length) for j in range(duration)
260
+ ]
261
+ else:
262
+ state_names = [f"{name}_{i}" for i in range(season_length)]
263
+ else:
264
+ if len(state_names) != season_length * duration:
265
+ raise ValueError(
266
+ f"state_names must be a list of length season_length*duration, got {len(state_names)}"
267
+ )
268
+ state_names = state_names.copy()
269
+
270
+ self.share_states = share_states
271
+ self.innovations = innovations
272
+ self.duration = duration
273
+ self.remove_first_state = remove_first_state
274
+ self.season_length = season_length
275
+
276
+ if self.remove_first_state:
277
+ # In traditional models, the first state isn't identified, so we can help out the user by automatically
278
+ # discarding it.
279
+ # TODO: Can this be stashed and reconstructed automatically somehow?
280
+ state_names = state_names[duration:]
281
+
282
+ self.provided_state_names = state_names
283
+
284
+ k_states = (season_length - int(self.remove_first_state)) * duration
285
+ k_endog = len(observed_state_names)
286
+ k_posdef = int(innovations)
287
+
288
+ super().__init__(
289
+ name=name,
290
+ k_endog=k_endog,
291
+ k_states=k_states if share_states else k_states * k_endog,
292
+ k_posdef=k_posdef if share_states else k_posdef * k_endog,
293
+ observed_state_names=observed_state_names,
294
+ measurement_error=False,
295
+ combine_hidden_states=True,
296
+ obs_state_idxs=np.tile(
297
+ np.array([1.0] + [0.0] * (k_states - 1)), 1 if share_states else k_endog
298
+ ),
299
+ share_states=share_states,
300
+ )
301
+
302
+ def populate_component_properties(self):
303
+ k_endog = self.k_endog
304
+ k_endog_effective = 1 if self.share_states else k_endog
305
+
306
+ k_states = self.k_states // k_endog_effective
307
+
308
+ if self.share_states:
309
+ self.state_names = [
310
+ f"{state_name}[{self.name}_shared]" for state_name in self.provided_state_names
311
+ ]
312
+ else:
313
+ self.state_names = [
314
+ f"{state_name}[{endog_name}]"
315
+ for endog_name in self.observed_state_names
316
+ for state_name in self.provided_state_names
317
+ ]
318
+
319
+ self.param_names = [f"params_{self.name}"]
320
+
321
+ self.param_info = {
322
+ f"params_{self.name}": {
323
+ "shape": (k_states,) if k_endog == 1 else (k_endog, k_states),
324
+ "constraints": None,
325
+ "dims": (f"state_{self.name}",)
326
+ if k_endog_effective == 1
327
+ else (f"endog_{self.name}", f"state_{self.name}"),
328
+ }
329
+ }
330
+
331
+ self.param_dims = {
332
+ f"params_{self.name}": (f"state_{self.name}",)
333
+ if k_endog_effective == 1
334
+ else (f"endog_{self.name}", f"state_{self.name}")
335
+ }
336
+
337
+ self.coords = (
338
+ {f"state_{self.name}": self.provided_state_names}
339
+ if k_endog_effective == 1
340
+ else {
341
+ f"endog_{self.name}": self.observed_state_names,
342
+ f"state_{self.name}": self.provided_state_names,
343
+ }
344
+ )
345
+
346
+ if self.innovations:
347
+ self.param_names += [f"sigma_{self.name}"]
348
+ self.param_info[f"sigma_{self.name}"] = {
349
+ "shape": () if k_endog_effective == 1 else (k_endog,),
350
+ "constraints": "Positive",
351
+ "dims": None if k_endog_effective == 1 else (f"endog_{self.name}",),
352
+ }
353
+ if self.share_states:
354
+ self.shock_names = [f"{self.name}[shared]"]
355
+ else:
356
+ self.shock_names = [f"{self.name}[{name}]" for name in self.observed_state_names]
357
+
358
+ if k_endog > 1:
359
+ self.param_dims[f"sigma_{self.name}"] = (f"endog_{self.name}",)
360
+
361
+ def make_symbolic_graph(self) -> None:
362
+ k_endog = self.k_endog
363
+ k_endog_effective = 1 if self.share_states else k_endog
364
+ k_states = self.k_states // k_endog_effective
365
+ duration = self.duration
366
+
367
+ k_unique_states = k_states // duration
368
+ k_posdef = self.k_posdef // k_endog_effective
369
+
370
+ if self.remove_first_state:
371
+ # In this case, parameters are normalized to sum to zero, so the current state is the negative sum of
372
+ # all previous states.
373
+ zero_d = pt.zeros((self.duration, self.duration))
374
+ id_d = pt.eye(self.duration)
375
+
376
+ row_blocks = []
377
+
378
+ # First row: all -1_d blocks
379
+ first_row = [-id_d for _ in range(self.season_length - 1)]
380
+ row_blocks.append(pt.concatenate(first_row, axis=1))
381
+
382
+ # Rows 2 to season_length-1: shifted identity blocks
383
+ for i in range(self.season_length - 2):
384
+ row = []
385
+ for j in range(self.season_length - 1):
386
+ if j == i:
387
+ row.append(id_d)
388
+ else:
389
+ row.append(zero_d)
390
+ row_blocks.append(pt.concatenate(row, axis=1))
391
+
392
+ # Stack blocks
393
+ T = pt.concatenate(row_blocks, axis=0)
394
+ else:
395
+ # In this case we assume the user to be responsible for ensuring the states sum to zero, so T is just a
396
+ # circulant matrix that cycles between the states.
397
+ T = pt.eye(k_states, k=1)
398
+ T = pt.set_subtensor(T[-1, 0], 1)
399
+
400
+ self.ssm["transition", :, :] = pt.linalg.block_diag(*[T for _ in range(k_endog_effective)])
401
+
402
+ Z = pt.zeros((1, k_states))[0, 0].set(1)
403
+ self.ssm["design", :, :] = pt.linalg.block_diag(*[Z for _ in range(k_endog_effective)])
404
+
405
+ initial_states = self.make_and_register_variable(
406
+ f"params_{self.name}",
407
+ shape=(k_unique_states,)
408
+ if k_endog_effective == 1
409
+ else (k_endog_effective, k_unique_states),
410
+ )
411
+ if k_endog_effective == 1:
412
+ self.ssm["initial_state", :] = pt.extra_ops.repeat(initial_states, duration, axis=0)
413
+ else:
414
+ self.ssm["initial_state", :] = pt.extra_ops.repeat(
415
+ initial_states, duration, axis=1
416
+ ).ravel()
417
+
418
+ if self.innovations:
419
+ R = pt.zeros((k_states, k_posdef))[0, 0].set(1.0)
420
+ self.ssm["selection", :, :] = pt.join(0, *[R for _ in range(k_endog_effective)])
421
+ season_sigma = self.make_and_register_variable(
422
+ f"sigma_{self.name}", shape=() if k_endog_effective == 1 else (k_endog_effective,)
423
+ )
424
+ cov_idx = ("state_cov", *np.diag_indices(k_posdef * k_endog_effective))
425
+ self.ssm[cov_idx] = season_sigma**2
426
+
427
+
428
+ class FrequencySeasonality(Component):
429
+ r"""
430
+ Seasonal component, modeled in the frequency domain
431
+
432
+ Parameters
433
+ ----------
434
+ season_length: float
435
+ The number of periods in a single seasonal cycle, e.g. 12 for monthly data with annual seasonal pattern, 7 for
436
+ daily data with weekly seasonal pattern, etc. Non-integer seasonal_length is also permitted, for example
437
+ 365.2422 days in a (solar) year.
438
+
439
+ n: int
440
+ Number of fourier features to include in the seasonal component. Default is ``season_length // 2``, which
441
+ is the maximum possible. A smaller number can be used for a more wave-like seasonal pattern.
442
+
443
+ name: str, default None
444
+ A name for this seasonal component. Used to label dimensions and coordinates. Useful when multiple seasonal
445
+ components are included in the same model. Default is ``f"Seasonal[s={season_length}, n={n}]"``
446
+
447
+ innovations: bool, default True
448
+ Whether to include stochastic innovations in the strength of the seasonal effect
449
+
450
+ observed_state_names: list[str] | None, default None
451
+ List of strings for observed state labels. If None, defaults to ["data"].
452
+
453
+ share_states: bool, default False
454
+ Whether latent states are shared across the observed states. If True, there will be only one set of latent
455
+ states, which are observed by all observed states. If False, each observed state has its own set of
456
+ latent states. This argument has no effect if `k_endog` is 1.
457
+
458
+ Notes
459
+ -----
460
+ A seasonal effect is any pattern that repeats every fixed interval. Although there are many possible ways to
461
+ model seasonal effects, the implementation used here is the one described by [1] as the "canonical" frequency domain
462
+ representation. The seasonal component can be expressed:
463
+
464
+ .. math::
465
+ \begin{align}
466
+ \gamma_t &= \sum_{j=1}^{2n} \gamma_{j,t} \\
467
+ \gamma_{j, t+1} &= \gamma_{j,t} \cos \lambda_j + \gamma_{j,t}^\star \sin \lambda_j + \omega_{j, t} \\
468
+ \gamma_{j, t}^\star &= -\gamma_{j,t} \sin \lambda_j + \gamma_{j,t}^\star \cos \lambda_j + \omega_{j,t}^\star
469
+ \lambda_j &= \frac{2\pi j}{s}
470
+ \end{align}
471
+
472
+ Where :math:`s` is the ``seasonal_length``.
473
+
474
+ Unlike a ``TimeSeasonality`` component, a ``FrequencySeasonality`` component does not require integer season
475
+ length. In addition, for long seasonal periods, it is possible to obtain a more compact state space representation
476
+ by choosing ``n << s // 2``. Using ``TimeSeasonality``, an annual seasonal pattern in daily data requires 364
477
+ states, whereas ``FrequencySeasonality`` always requires ``2 * n`` states, regardless of the ``seasonal_length``.
478
+ The price of this compactness is less representational power. At ``n = 1``, the seasonal pattern will be a pure
479
+ sine wave. At ``n = s // 2``, any arbitrary pattern can be represented.
480
+
481
+ One cost of the added flexibility of ``FrequencySeasonality`` is reduced interpretability. States of this model are
482
+ coefficients :math:`\gamma_1, \gamma^\star_1, \gamma_2, \gamma_2^\star ..., \gamma_n, \gamma^\star_n` associated
483
+ with different frequencies in the fourier representation of the seasonal pattern. As a result, it is not possible
484
+ to isolate and identify a "Monday" effect, for instance.
485
+ """
486
+
487
+ def __init__(
488
+ self,
489
+ season_length: int,
490
+ n: int | None = None,
491
+ name: str | None = None,
492
+ innovations: bool = True,
493
+ observed_state_names: list[str] | None = None,
494
+ share_states: bool = False,
495
+ ):
496
+ if observed_state_names is None:
497
+ observed_state_names = ["data"]
498
+
499
+ self.share_states = share_states
500
+ k_endog = len(observed_state_names)
501
+
502
+ if n is None:
503
+ n = int(season_length / 2)
504
+ if name is None:
505
+ name = f"Frequency[s={season_length}, n={n}]"
506
+
507
+ k_states = n * 2
508
+ self.n = n
509
+ self.season_length = season_length
510
+ self.innovations = innovations
511
+
512
+ # If the model is completely saturated (n = s // 2), the last state will not be identified, so it shouldn't
513
+ # get a parameter assigned to it and should just be fixed to zero.
514
+ # Test this way (rather than n == s // 2) to catch cases when n is non-integer.
515
+ self.last_state_not_identified = (self.season_length / self.n) == 2.0
516
+ self.n_coefs = k_states - int(self.last_state_not_identified)
517
+
518
+ obs_state_idx = np.zeros(k_states)
519
+ obs_state_idx[slice(0, k_states, 2)] = 1
520
+ obs_state_idx = np.tile(obs_state_idx, 1 if share_states else k_endog)
521
+
522
+ super().__init__(
523
+ name=name,
524
+ k_endog=k_endog,
525
+ k_states=k_states if share_states else k_states * k_endog,
526
+ k_posdef=k_states * int(self.innovations)
527
+ if share_states
528
+ else k_states * int(self.innovations) * k_endog,
529
+ share_states=share_states,
530
+ observed_state_names=observed_state_names,
531
+ measurement_error=False,
532
+ combine_hidden_states=True,
533
+ obs_state_idxs=obs_state_idx,
534
+ )
535
+
536
+ def make_symbolic_graph(self) -> None:
537
+ k_endog = self.k_endog
538
+ k_endog_effective = 1 if self.share_states else k_endog
539
+
540
+ k_states = self.k_states // k_endog_effective
541
+ k_posdef = self.k_posdef // k_endog_effective
542
+ n_coefs = self.n_coefs
543
+
544
+ Z = pt.zeros((1, k_states))[0, slice(0, k_states, 2)].set(1.0)
545
+
546
+ self.ssm["design", :, :] = pt.linalg.block_diag(*[Z for _ in range(k_endog_effective)])
547
+
548
+ init_state = self.make_and_register_variable(
549
+ f"params_{self.name}", shape=(n_coefs,) if k_endog == 1 else (k_endog, n_coefs)
550
+ )
551
+
552
+ init_state_idx = np.concatenate(
553
+ [
554
+ np.arange(k_states * i, (i + 1) * k_states, dtype=int)[:n_coefs]
555
+ for i in range(k_endog_effective)
556
+ ],
557
+ axis=0,
558
+ )
559
+
560
+ self.ssm["initial_state", init_state_idx] = init_state.ravel()
561
+
562
+ T_mats = [_frequency_transition_block(self.season_length, j + 1) for j in range(self.n)]
563
+ T = pt.linalg.block_diag(*T_mats)
564
+ self.ssm["transition", :, :] = pt.linalg.block_diag(*[T for _ in range(k_endog_effective)])
565
+
566
+ if self.innovations:
567
+ sigma_season = self.make_and_register_variable(
568
+ f"sigma_{self.name}", shape=() if k_endog_effective == 1 else (k_endog_effective,)
569
+ )
570
+ self.ssm["selection", :, :] = pt.eye(self.k_states)
571
+ self.ssm["state_cov", :, :] = pt.eye(self.k_posdef) * pt.repeat(
572
+ sigma_season**2, k_posdef
573
+ )
574
+
575
+ def populate_component_properties(self):
576
+ k_endog = self.k_endog
577
+ k_endog_effective = 1 if self.share_states else k_endog
578
+ n_coefs = self.n_coefs
579
+
580
+ base_names = [f"{f}_{i}_{self.name}" for i in range(self.n) for f in ["Cos", "Sin"]]
581
+
582
+ if self.share_states:
583
+ self.state_names = [f"{name}[shared]" for name in base_names]
584
+ else:
585
+ self.state_names = [
586
+ f"{name}[{obs_state_name}]"
587
+ for obs_state_name in self.observed_state_names
588
+ for name in base_names
589
+ ]
590
+
591
+ # Trim state names if the model is saturated
592
+ param_state_names = base_names[:n_coefs]
593
+
594
+ self.param_names = [f"params_{self.name}"]
595
+ self.param_dims = {
596
+ f"params_{self.name}": (f"state_{self.name}",)
597
+ if k_endog_effective == 1
598
+ else (f"endog_{self.name}", f"state_{self.name}")
599
+ }
600
+ self.param_info = {
601
+ f"params_{self.name}": {
602
+ "shape": (n_coefs,) if k_endog_effective == 1 else (k_endog_effective, n_coefs),
603
+ "constraints": None,
604
+ "dims": (f"state_{self.name}",)
605
+ if k_endog_effective == 1
606
+ else (f"endog_{self.name}", f"state_{self.name}"),
607
+ }
608
+ }
609
+
610
+ self.coords = (
611
+ {f"state_{self.name}": param_state_names}
612
+ if k_endog == 1
613
+ else {
614
+ f"endog_{self.name}": self.observed_state_names,
615
+ f"state_{self.name}": param_state_names,
616
+ }
617
+ )
618
+
619
+ if self.innovations:
620
+ self.param_names += [f"sigma_{self.name}"]
621
+ self.shock_names = self.state_names.copy()
622
+ self.param_info[f"sigma_{self.name}"] = {
623
+ "shape": () if k_endog_effective == 1 else (k_endog_effective, n_coefs),
624
+ "constraints": "Positive",
625
+ "dims": None if k_endog_effective == 1 else (f"endog_{self.name}",),
626
+ }
627
+ if k_endog_effective > 1:
628
+ self.param_dims[f"sigma_{self.name}"] = (f"endog_{self.name}",)